CLEiM
Cross Lingual Education in Medicine
|
00001 00005 package com.uem.gsi.cleim.rpd; 00006 00007 import java.io.BufferedWriter; 00008 import java.io.FileOutputStream; 00009 import java.io.IOException; 00010 import java.io.OutputStreamWriter; 00011 import java.io.Writer; 00012 import java.util.ArrayList; 00013 import java.util.List; 00014 00015 import org.w3c.dom.Document; 00016 import org.w3c.dom.Node; 00017 import org.w3c.dom.NodeList; 00018 00019 import com.uem.gsi.cleim.util.Constants; 00020 import com.uem.gsi.cleim.util.MyDOMParser; 00021 00022 public class MLPXMLToFile { 00023 private String sWebInf=""; 00024 private int count=0; 00025 private int countSpanish=0; 00026 private int countEnglish=0; 00027 private List<String> lTopicLang; 00028 private List<String> lID; 00029 private List<String> lTopicName; 00030 private List<String> lUrl; 00031 private List<String> lFullSummary; 00032 private List<String> lGroupNames; 00033 private List<ArrayList<String>> lSynonyms; 00034 private ArrayList<String> lSynonym; 00035 00036 public MLPXMLToFile(String pWebInf){ 00037 sWebInf=pWebInf; 00038 lTopicLang = new ArrayList<String>(); 00039 lID = new ArrayList<String>(); 00040 lTopicName = new ArrayList<String>(); 00041 lUrl = new ArrayList<String>(); 00042 lFullSummary = new ArrayList<String>(); 00043 lGroupNames = new ArrayList<String>(); 00044 lSynonyms = new ArrayList<ArrayList<String>>(); 00045 } 00046 00047 public void xmlToList(){ 00048 try { 00049 Document doc; 00050 //Local 00051 if (sWebInf!="") 00052 doc = MyDOMParser.getDocument(sWebInf+Constants.MLP_XML_VOCAB, 1); 00053 //Remote 00054 else 00055 doc = MyDOMParser.getDocument(Constants.MLP_XML_VOCAB, 0); 00056 this.count=new Integer(doc.getElementsByTagName("MedicalTopics").item(0).getAttributes().getNamedItem("total").getNodeValue()); 00057 this.countEnglish=new Integer(doc.getElementsByTagName("MedicalTopics").item(0).getAttributes().getNamedItem("totalEnglish").getNodeValue()); 00058 this.countSpanish=new Integer(doc.getElementsByTagName("MedicalTopics").item(0).getAttributes().getNamedItem("totalSpanish").getNodeValue()); 00059 00060 NodeList listRes=doc.getElementsByTagName("MedicalTopic"); 00061 String id="",name="",url="",fullsum="",groups=""; 00062 for (int j=0;j<listRes.getLength();j++){ 00063 lSynonym = new ArrayList<String>(); 00064 Node annotNode=listRes.item(j); 00065 //English or spanish 00066 lTopicLang.add(annotNode.getAttributes().getNamedItem("langcode").getNodeValue()); 00067 NodeList listChildAnnot=annotNode.getChildNodes(); 00068 for (int i=0;i<listChildAnnot.getLength();i++){ 00069 Node nodeContent=listChildAnnot.item(i); 00070 if (nodeContent.getNodeName().equals("ID")) 00071 id=nodeContent.getTextContent(); 00072 if (nodeContent.getNodeName().equals("MedicalTopicName")) 00073 name=nodeContent.getTextContent(); 00074 if (nodeContent.getNodeName().equals("URL")) 00075 url=nodeContent.getTextContent(); 00076 if (nodeContent.getNodeName().equals("FullSummary")) 00077 fullsum=nodeContent.getTextContent(); 00078 //Groups names 00079 if (nodeContent.getNodeName().equals("Groups")){ 00080 NodeList listGroups=nodeContent.getChildNodes(); 00081 groups=""; 00082 for (int k=0;k<listGroups.getLength();k++){ 00083 if (listGroups.item(k).getNodeType()==1) 00084 groups+=(groups.equals(""))? 00085 listGroups.item(k).getChildNodes().item(3).getTextContent(): 00086 " | "+listGroups.item(k).getChildNodes().item(3).getTextContent(); 00087 } 00088 } 00089 //Synonyms 00090 if (nodeContent.getNodeName().equals("Synonyms")){ 00091 NodeList listSynonyms=nodeContent.getChildNodes(); 00092 for (int k=0;k<listSynonyms.getLength();k++){ 00093 if (listSynonyms.item(k).getNodeType()==1) 00094 lSynonym.add(listSynonyms.item(k).getTextContent()); 00095 } 00096 } 00097 } 00098 lID.add(id); 00099 lTopicName.add(name); 00100 lUrl.add(url); 00101 lFullSummary.add(fullsum); 00102 lGroupNames.add(groups); 00103 lSynonyms.add(lSynonym); 00104 } 00105 } catch (Exception e) { 00106 System.out.println("Exception reading medlineplus xml: "+e.toString()); 00107 } 00108 } 00109 private void loadListFileLan(String filePath,String lan){ 00110 try { 00111 Writer out = new BufferedWriter(new OutputStreamWriter( 00112 new FileOutputStream(filePath), "UTF8")); 00113 // Write english data 00114 for (int i=0;i<lID.size();i++){ 00115 if (lTopicLang.get(i).equals(lan)){ 00116 //out.write(lTopicName.get(i)+"&name="+lTopicName.get(i)+"&url="+lUrl.get(i)+"&groups="+lGroupNames.get(i)+"\n"); 00117 out.write(lTopicName.get(i)+"&url="+lUrl.get(i)+"&groups="+lGroupNames.get(i)+"\n"); 00118 for (int j=0;j<lSynonyms.get(i).size();j++){ 00119 //out.write(lSynonyms.get(i).get(j)+"&name="+lSynonyms.get(i).get(j)+"&url="+lUrl.get(i)+"&groups="+lGroupNames.get(i)+"\n"); 00120 out.write(lSynonyms.get(i).get(j)+"&url="+lUrl.get(i)+"&groups="+lGroupNames.get(i)+"\n"); 00121 00122 } 00123 } 00124 } 00125 out.close(); 00126 } catch (IOException e) { 00127 e.printStackTrace(); 00128 } 00129 } 00130 00131 public static void main(String[] args) { 00132 try{ 00133 // PUT HERE YOUR ABSOLUTE PATH IF YOU WANT LOCAL PROCESS, KEEP EMPTY IF NOT 00134 String xmlpath=""; 00135 // PUT HERE YOUR LOCAL WEB-INF PATH TO AUTOMATICALLY UPDATE GATE FILES 00136 String webinf="WebContent/WEB-INF"; 00137 MLPXMLToFile mlp=new MLPXMLToFile(xmlpath); 00138 System.out.println("READING XML FILES..."); 00139 mlp.xmlToList(); 00140 System.out.println("Topics number: "+mlp.count+ 00141 " In English: "+mlp.countEnglish+". In Spanish: "+mlp.countSpanish); 00142 for(int i=0; i<mlp.lID.size();i++){ 00143 System.out.println("lang: "+mlp.lTopicLang.get(i)); 00144 System.out.println("ID: "+mlp.lID.get(i)); 00145 System.out.println("Name: "+mlp.lTopicName.get(i)); 00146 System.out.println("Url: "+mlp.lUrl.get(i)); 00147 System.out.println("FullSummary: "+mlp.lFullSummary.get(i)); 00148 System.out.println("Groups: "+mlp.lGroupNames.get(i)); 00149 for (int j=0;j<mlp.lSynonyms.get(i).size();j++) 00150 System.out.println("Synonyms: "+mlp.lSynonyms.get(i).get(j)); 00151 } 00152 System.out.println("LOADING GATE LIST FILE..."); 00153 //TO LOAD DISJOINTED LANGUAGE FILES TO THE TERMS 00154 mlp.loadListFileLan(webinf+Constants.PATH_MLP_LST_EN, "English"); 00155 mlp.loadListFileLan(webinf+Constants.PATH_MLP_LST_SP, "Spanish"); 00156 //USE THIS TO LOAD ENGLISH AND SPANISH TERMS IN THE SAME LIST 00157 //mlp.loadListFile(Constants.PATH_MLP_LST); 00158 System.out.println("DONE"); 00159 00160 }catch(Exception e){ 00161 System.out.println("Error: "+e.toString()); 00162 } 00163 } 00164 00165 }