CLEiM
Cross Lingual Education in Medicine
|
00001 00005 package com.uem.gsi.cleim.rpd; 00006 00007 import java.io.BufferedReader; 00008 import java.io.BufferedWriter; 00009 import java.io.FileOutputStream; 00010 import java.io.FileReader; 00011 import java.io.FileWriter; 00012 import java.io.IOException; 00013 import java.io.OutputStreamWriter; 00014 import java.io.PrintWriter; 00015 import java.io.Writer; 00016 import java.util.ArrayList; 00017 import java.util.List; 00018 00019 import org.w3c.dom.Document; 00020 import org.w3c.dom.Node; 00021 import org.w3c.dom.NodeList; 00022 00023 import com.uem.gsi.cleim.util.Constants; 00024 00025 import java.util.regex.*; 00026 public class SNOMEDToFile { 00027 /*private String sWebInf=""; 00028 private int count=0; 00029 private int countSpanish=0; 00030 private int countEnglish=0; 00031 private List<String> lTopicLang; 00032 private List<String> lID; 00033 private List<String> lTopicName; 00034 private List<String> lUrl; 00035 private List<String> lFullSummary; 00036 private List<String> lGroupNames; 00037 private List<ArrayList<String>> lSynonyms; 00038 private ArrayList<String> lSynonym; 00039 00040 public SNOMEDToFile(String pWebInf){ 00041 sWebInf=pWebInf; 00042 lTopicLang = new ArrayList<String>(); 00043 lID = new ArrayList<String>(); 00044 lTopicName = new ArrayList<String>(); 00045 lUrl = new ArrayList<String>(); 00046 lFullSummary = new ArrayList<String>(); 00047 lGroupNames = new ArrayList<String>(); 00048 lSynonyms = new ArrayList<ArrayList<String>>(); 00049 } 00050 00051 public void textToList(){ 00052 try { 00053 Document doc; 00054 //Local 00055 if (sWebInf!="") 00056 doc = MyDOMParser.getDocument(sWebInf+Constants.MLP_XML_VOCAB, 1); 00057 //Remote 00058 else 00059 doc = MyDOMParser.getDocument(Constants.MLP_XML_VOCAB, 0); 00060 this.count=new Integer(doc.getElementsByTagName("MedicalTopics").item(0).getAttributes().getNamedItem("total").getNodeValue()); 00061 this.countEnglish=new Integer(doc.getElementsByTagName("MedicalTopics").item(0).getAttributes().getNamedItem("totalEnglish").getNodeValue()); 00062 this.countSpanish=new Integer(doc.getElementsByTagName("MedicalTopics").item(0).getAttributes().getNamedItem("totalSpanish").getNodeValue()); 00063 00064 NodeList listRes=doc.getElementsByTagName("MedicalTopic"); 00065 String id="",name="",url="",fullsum="",groups=""; 00066 for (int j=0;j<listRes.getLength();j++){ 00067 lSynonym = new ArrayList<String>(); 00068 Node annotNode=listRes.item(j); 00069 //English or spanish 00070 lTopicLang.add(annotNode.getAttributes().getNamedItem("langcode").getNodeValue()); 00071 NodeList listChildAnnot=annotNode.getChildNodes(); 00072 for (int i=0;i<listChildAnnot.getLength();i++){ 00073 Node nodeContent=listChildAnnot.item(i); 00074 if (nodeContent.getNodeName().equals("ID")) 00075 id=nodeContent.getTextContent(); 00076 if (nodeContent.getNodeName().equals("MedicalTopicName")) 00077 name=nodeContent.getTextContent(); 00078 if (nodeContent.getNodeName().equals("URL")) 00079 url=nodeContent.getTextContent(); 00080 if (nodeContent.getNodeName().equals("FullSummary")) 00081 fullsum=nodeContent.getTextContent(); 00082 //Groups names 00083 if (nodeContent.getNodeName().equals("Groups")){ 00084 NodeList listGroups=nodeContent.getChildNodes(); 00085 groups=""; 00086 for (int k=0;k<listGroups.getLength();k++){ 00087 if (listGroups.item(k).getNodeType()==1) 00088 groups+=(groups.equals(""))? 00089 listGroups.item(k).getChildNodes().item(3).getTextContent(): 00090 " | "+listGroups.item(k).getChildNodes().item(3).getTextContent(); 00091 } 00092 } 00093 //Synonyms 00094 if (nodeContent.getNodeName().equals("Synonyms")){ 00095 NodeList listSynonyms=nodeContent.getChildNodes(); 00096 for (int k=0;k<listSynonyms.getLength();k++){ 00097 if (listSynonyms.item(k).getNodeType()==1) 00098 lSynonym.add(listSynonyms.item(k).getTextContent()); 00099 } 00100 } 00101 } 00102 lID.add(id); 00103 lTopicName.add(name); 00104 lUrl.add(url); 00105 lFullSummary.add(fullsum); 00106 lGroupNames.add(groups); 00107 lSynonyms.add(lSynonym); 00108 } 00109 } catch (Exception e) { 00110 System.out.println("Exception reading medlineplus xml: "+e.toString()); 00111 } 00112 } 00113 00114 private void loadListFileLan(String filePath,String lan){ 00115 try { 00116 Writer out = new BufferedWriter(new OutputStreamWriter( 00117 new FileOutputStream(filePath), "UTF8")); 00118 // Write english data 00119 for (int i=0;i<lID.size();i++){ 00120 if (lTopicLang.get(i).equals(lan)){ 00121 //out.write(lTopicName.get(i)+"&name="+lTopicName.get(i)+"&url="+lUrl.get(i)+"&groups="+lGroupNames.get(i)+"\n"); 00122 out.write(lTopicName.get(i)+"&url="+lUrl.get(i)+"&groups="+lGroupNames.get(i)+"\n"); 00123 for (int j=0;j<lSynonyms.get(i).size();j++){ 00124 //out.write(lSynonyms.get(i).get(j)+"&name="+lSynonyms.get(i).get(j)+"&url="+lUrl.get(i)+"&groups="+lGroupNames.get(i)+"\n"); 00125 out.write(lSynonyms.get(i).get(j)+"&url="+lUrl.get(i)+"&groups="+lGroupNames.get(i)+"\n"); 00126 00127 } 00128 } 00129 } 00130 out.close(); 00131 } catch (IOException e) { 00132 e.printStackTrace(); 00133 } 00134 } 00135 00136 public static void main(String[] args) { 00137 try{ 00138 // PUT HERE YOUR ABSOLUTE PATH IF YOU WANT LOCAL PROCESS, KEEP EMPTY IF NOT 00139 String xmlpath=""; 00140 // PUT HERE YOUR LOCAL WEB-INF PATH TO AUTOMATICALLY UPDATE GATE FILES 00141 String webinf="WebContent/WEB-INF"; 00142 SNOMEDToFile mlp=new SNOMEDToFile(xmlpath); 00143 System.out.println("READING XML FILES..."); 00144 mlp.xmlToList(); 00145 System.out.println("Topics number: "+mlp.count+ 00146 " In English: "+mlp.countEnglish+". In Spanish: "+mlp.countSpanish); 00147 for(int i=0; i<mlp.lID.size();i++){ 00148 System.out.println("lang: "+mlp.lTopicLang.get(i)); 00149 System.out.println("ID: "+mlp.lID.get(i)); 00150 System.out.println("Name: "+mlp.lTopicName.get(i)); 00151 System.out.println("Url: "+mlp.lUrl.get(i)); 00152 System.out.println("FullSummary: "+mlp.lFullSummary.get(i)); 00153 System.out.println("Groups: "+mlp.lGroupNames.get(i)); 00154 for (int j=0;j<mlp.lSynonyms.get(i).size();j++) 00155 System.out.println("Synonyms: "+mlp.lSynonyms.get(i).get(j)); 00156 } 00157 System.out.println("LOADING GATE LIST FILE..."); 00158 //TO LOAD DISJOINTED LANGUAGE FILES TO THE TERMS 00159 mlp.loadListFileLan(webinf+Constants.PATH_MLP_LST_EN, "English"); 00160 mlp.loadListFileLan(webinf+Constants.PATH_MLP_LST_SP, "Spanish"); 00161 //USE THIS TO LOAD ENGLISH AND SPANISH TERMS IN THE SAME LIST 00162 //mlp.loadListFile(Constants.PATH_MLP_LST); 00163 System.out.println("DONE"); 00164 00165 }catch(Exception e){ 00166 System.out.println("Error: "+e.toString()); 00167 } 00168 } 00169 */ 00170 public void replacePattern(String filePathInput, String filePathOutput, 00171 String sPattern, String sReplacement){ 00172 try { 00173 FileReader fr = new FileReader(filePathInput); 00174 BufferedReader br = new BufferedReader(fr); 00175 FileWriter fw = new FileWriter(filePathOutput); 00176 PrintWriter pw = new PrintWriter(fw); 00177 String line = null; 00178 00179 while((line=br.readLine()) != null) { 00180 line=line.replaceAll(sPattern, sReplacement); 00181 pw.write(line); 00182 } 00183 }catch(Exception e){ 00184 System.out.println("Exception: "+e.toString()); 00185 } 00186 } 00187 public static void main(String[] args) { 00188 00189 } 00190 }