|
|
马上注册,结交更多好友,享用更多功能^_^
您需要 登录 才可以下载或查看,没有账号?立即注册
x
现在手上有一份文件是(文件名.tmx)tmx格式的,使用编辑器打开的时候格式如下:
需要把它转化成txt文档,将每个tuv里面的中文放在前面加上 ||| 这个符号,英文跟在后面为一行(中文|||英文),有时候文件很大,有几十上百万个tuv,用python要怎么写会比较好?
求高手指点。
<?xml version="1.0" ?>
<!DOCTYPE tmx SYSTEM "tmx14.dtd">
<tmx version="1.4">
<header
creationtool="TRADOS Translator's Workbench for Windows"
creationtoolversion="Edition 8 Build 863"
segtype="sentence"
o-tmf="TW4Win 2.0 Format"
adminlang="EN-US"
srclang="ZH-CN"
datatype="rtf"
creationdate="20150827T031452Z"
creationid="1"
>
<prop type="RTFFontTable">
{\fonttbl
{\f1 \fmodern\fprq1 \fcharset134 \'d3\'d7\'d4\'b2;}
{\f2 \fswiss\fprq2 \fcharset0 Arial;}
{\f3 \fmodern\fprq1 \fcharset134 Arial Unicode MS;}
{\f4 \fbidi\froman\fprq2 {\*\panose 02020603050405020304}\fcharset0 Times New Roman;}
{\f5 \fbidi\fnil\fprq2 {\*\panose 02010600030101010101}{\*\falt SimSun}\fcharset134 \'cb\'ce\'cc\'e5;}
{\f6 \fbidi\froman\fprq2 {\*\panose 02040503050406030204}\fcharset0 Cambria Math;}
{\f7 \fbidi\fmodern\fprq1 {\*\panose 02010609060101010101}{\*\falt Arial Unicode MS}\fcharset134 \'b7\'c2\'cb\'ce;}
{\f8 \fbidi\fnil\fprq2 {\*\panose 02010600030101010101}\fcharset134 @\'cb\'ce\'cc\'e5;}
{\f9 \fbidi\fmodern\fprq1 \fcharset134 @\'b7\'c2\'cb\'ce;}
{\f10 \fhimajor\fbidi\froman\fprq2 {\*\panose 02040503050406030204}\fcharset0 Cambria;}
{\f11 \fhiminor\fbidi\fswiss\fprq2 {\*\panose 020f0502020204030204}\fcharset0 Calibri;}
{\f12 \fbidi\froman\fprq2 \fcharset238 Times New Roman CE;}
{\f13 \fbidi\froman\fprq2 \fcharset204 Times New Roman Cyr;}
{\f14 \fbidi\froman\fprq2 \fcharset161 Times New Roman Greek;}
{\f15 \fbidi\froman\fprq2 \fcharset162 Times New Roman Tur;}
{\f16 \fbidi\froman\fprq2 \fcharset177 Times New Roman (Hebrew);}
{\f17 \fbidi\froman\fprq2 \fcharset178 Times New Roman (Arabic);}
{\f18 \fbidi\froman\fprq2 \fcharset186 Times New Roman Baltic;}
{\f19 \fbidi\froman\fprq2 \fcharset163 Times New Roman (Vietnamese);}
{\f20 \fbidi\fnil\fprq2 {\*\falt SimSun}\fcharset0 SimSun Western;}
{\f21 \fbidi\froman\fprq2 \fcharset238 Cambria Math CE;}
{\f22 \fbidi\froman\fprq2 \fcharset204 Cambria Math Cyr;}
{\f23 \fbidi\froman\fprq2 \fcharset161 Cambria Math Greek;}
{\f24 \fbidi\froman\fprq2 \fcharset162 Cambria Math Tur;}
{\f25 \fbidi\froman\fprq2 \fcharset186 Cambria Math Baltic;}
{\f26 \fbidi\fmodern\fprq1 {\*\falt Arial Unicode MS}\fcharset0 \'b7\'c2\'cb\'ce Western;}
{\f27 \fbidi\fnil\fprq2 \fcharset0 @\'cb\'ce\'cc\'e5 Western;}
{\f28 \fbidi\fmodern\fprq1 \fcharset0 @\'b7\'c2\'cb\'ce Western;}
{\f29 \fhimajor\fbidi\froman\fprq2 \fcharset238 Cambria CE;}
{\f30 \fhimajor\fbidi\froman\fprq2 \fcharset204 Cambria Cyr;}
{\f31 \fhimajor\fbidi\froman\fprq2 \fcharset161 Cambria Greek;}
{\f32 \fhimajor\fbidi\froman\fprq2 \fcharset162 Cambria Tur;}
{\f33 \fhimajor\fbidi\froman\fprq2 \fcharset186 Cambria Baltic;}
{\f34 \fhiminor\fbidi\fswiss\fprq2 \fcharset238 Calibri CE;}
{\f35 \fhiminor\fbidi\fswiss\fprq2 \fcharset204 Calibri Cyr;}
{\f36 \fhiminor\fbidi\fswiss\fprq2 \fcharset161 Calibri Greek;}
{\f37 \fhiminor\fbidi\fswiss\fprq2 \fcharset162 Calibri Tur;}
{\f38 \fhiminor\fbidi\fswiss\fprq2 \fcharset186 Calibri Baltic;}
{\f39 \fbidi\fmodern\fprq1 {\*\panose 02070309020205020404}\fcharset0 Courier New;}
{\f40 \fbidi\fswiss\fprq2 \fcharset238 Arial CE;}
{\f41 \fbidi\fswiss\fprq2 \fcharset204 Arial Cyr;}
{\f42 \fbidi\fswiss\fprq2 \fcharset161 Arial Greek;}
{\f43 \fbidi\fswiss\fprq2 \fcharset162 Arial Tur;}
{\f44 \fbidi\fswiss\fprq2 \fcharset177 Arial (Hebrew);}
{\f45 \fbidi\fswiss\fprq2 \fcharset178 Arial (Arabic);}
{\f46 \fbidi\fswiss\fprq2 \fcharset186 Arial Baltic;}
{\f47 \fbidi\fswiss\fprq2 \fcharset163 Arial (Vietnamese);}
{\f48 \fbidi\fmodern\fprq1 \fcharset238 Courier New CE;}
{\f49 \fbidi\fmodern\fprq1 \fcharset204 Courier New Cyr;}
{\f50 \fbidi\fmodern\fprq1 \fcharset161 Courier New Greek;}
{\f51 \fbidi\fmodern\fprq1 \fcharset162 Courier New Tur;}
{\f52 \fbidi\fmodern\fprq1 \fcharset177 Courier New (Hebrew);}
{\f53 \fbidi\fmodern\fprq1 \fcharset178 Courier New (Arabic);}
{\f54 \fbidi\fmodern\fprq1 \fcharset186 Courier New Baltic;}
{\f55 \fbidi\fmodern\fprq1 \fcharset163 Courier New (Vietnamese);}}</prop>
<prop type="RTFStyleSheet">
{\stylesheet
{\St \s0 {\StN Normal}}
{\St \cs1 {\StB \v\f1\fs24\sub\cf12 }{\StN tw4winMark}}
{\St \cs2 {\StB \cf4\fs40\f1 }{\StN tw4winError}}
{\St \cs3 {\StB \f1\cf11\lang1024 }{\StN tw4winPopup}}
{\St \cs4 {\StB \f1\cf10\lang1024 }{\StN tw4winJump}}
{\St \cs5 {\StB \f1\cf15\lang1024 }{\StN tw4winExternal}}
{\St \cs6 {\StB \f1\cf6\lang1024 }{\StN tw4winInternal}}
{\St \cs7 {\StB \cf2 }{\StN tw4winTerm}}
{\St \cs8 {\StB \f1\cf13\lang1024 }{\StN DO_NOT_TRANSLATE}}
{\St \cs9 \additive\ssemihidden\sunhideused0\spriority1 {\StN Default Paragraph Font}}}</prop>
</header>
<body>
<tu creationdate="20150707T030952Z" creationid="T">
<tuv xml:lang="ZH-CN">
<seg>采伐面积:</seg>
</tuv>
<tuv xml:lang="EN-US">
<seg>Felling Area:</seg>
</tuv>
</tu>
<tu creationdate="20150707T030952Z" creationid="T">
<tuv xml:lang="ZH-CN">
<seg>__0.23__公顷(或采伐株数:</seg>
</tuv>
<tuv xml:lang="EN-US">
<seg>__0.23__hectare(或Numbers of Trees:</seg>
</tuv>
</tu>
<tu creationdate="20150707T030952Z" creationid="T">
<tuv xml:lang="ZH-CN">
<seg>采伐蓄积:___35.7__立方米</seg>
</tuv>
<tuv xml:lang="EN-US">
<seg>Felling Volume:___35.7__cubic metres</seg>
</tuv>
</tu>
<tu creationdate="20150707T030952Z" creationid="T">
<tuv xml:lang="ZH-CN">
<seg>自用材:______________立方米</seg>
</tuv>
<tuv xml:lang="EN-US">
<seg>Self-used category:______________cubic metres</seg>
</tuv>
</tu>
<tu creationdate="20150707T030952Z" creationid="T">
<tuv xml:lang="ZH-CN">
<seg>烧材:______________立方米</seg>
</tuv>
<tuv xml:lang="EN-US">
<seg>Burnning-used category:______________cubic metres</seg>
</tuv>
</tu>
<tu creationdate="20150707T030952Z" creationid="T">
<tuv xml:lang="ZH-CN">
<seg>采伐期限:自2013年03月27日至04月27日,更新期限:2014年04月27日</seg>
</tuv>
<tuv xml:lang="EN-US">
<seg>Felling Period:From 27th March 2013 to 27th April 2013,Date of Update:27th April 2013</seg>
</tuv>
</tu>
<tu creationdate="20150707T030952Z" creationid="T">
<tuv xml:lang="ZH-CN">
<seg>管理机关(章)</seg>
</tuv>
<tuv xml:lang="EN-US">
<seg>Administrative Authority(stamp)</seg>
</tuv>
</tu>
</tu>
<tu creationdate="20150707T030952Z" creationid="T">
<tuv xml:lang="ZH-CN">
<seg>关键词:木薯收获技术,仿生式,拔起力,拔起速度,木薯茎秆夹具</seg>
</tuv>
<tuv xml:lang="EN-US">
<seg>Keywords: cassava harvest technology, bionic, uprooting force, uprooting speed, cassava stem fixture</seg>
</tuv>
</tu>
</body>
</tmx>
|
|