CLEiM
Cross Lingual Education in Medicine
 All Classes Namespaces Files Functions Variables
MLPXMLToFile.java
Go to the documentation of this file.
00001 
00005 package com.uem.gsi.cleim.rpd;
00006 
00007 import java.io.BufferedWriter;
00008 import java.io.FileOutputStream;
00009 import java.io.IOException;
00010 import java.io.OutputStreamWriter;
00011 import java.io.Writer;
00012 import java.util.ArrayList;
00013 import java.util.List;
00014 
00015 import org.w3c.dom.Document;
00016 import org.w3c.dom.Node;
00017 import org.w3c.dom.NodeList;
00018 
00019 import com.uem.gsi.cleim.util.Constants;
00020 import com.uem.gsi.cleim.util.MyDOMParser;
00021 
00022 public class MLPXMLToFile {
00023         private String sWebInf="";
00024         private int count=0;
00025   private int countSpanish=0;
00026   private int countEnglish=0;
00027   private List<String> lTopicLang;
00028   private List<String> lID;
00029         private List<String> lTopicName;
00030         private List<String> lUrl;
00031   private List<String> lFullSummary;
00032   private List<String> lGroupNames;
00033   private List<ArrayList<String>> lSynonyms;
00034   private ArrayList<String> lSynonym;
00035         
00036   public MLPXMLToFile(String pWebInf){
00037         sWebInf=pWebInf;
00038                 lTopicLang = new ArrayList<String>();
00039                 lID = new ArrayList<String>();
00040                 lTopicName = new ArrayList<String>();
00041                 lUrl = new ArrayList<String>();
00042                 lFullSummary = new ArrayList<String>();
00043                 lGroupNames = new ArrayList<String>();
00044                 lSynonyms = new ArrayList<ArrayList<String>>();
00045         }
00046         
00047   public void xmlToList(){
00048                 try {
00049                         Document doc;
00050                         //Local
00051                         if (sWebInf!="")
00052                                 doc = MyDOMParser.getDocument(sWebInf+Constants.MLP_XML_VOCAB, 1);
00053                         //Remote
00054                         else
00055                                 doc = MyDOMParser.getDocument(Constants.MLP_XML_VOCAB, 0);
00056                         this.count=new Integer(doc.getElementsByTagName("MedicalTopics").item(0).getAttributes().getNamedItem("total").getNodeValue());
00057                         this.countEnglish=new Integer(doc.getElementsByTagName("MedicalTopics").item(0).getAttributes().getNamedItem("totalEnglish").getNodeValue());
00058                         this.countSpanish=new Integer(doc.getElementsByTagName("MedicalTopics").item(0).getAttributes().getNamedItem("totalSpanish").getNodeValue());
00059                         
00060                         NodeList listRes=doc.getElementsByTagName("MedicalTopic");
00061                         String id="",name="",url="",fullsum="",groups="";
00062                         for (int j=0;j<listRes.getLength();j++){
00063                                 lSynonym = new ArrayList<String>();
00064                     Node annotNode=listRes.item(j);
00065                                 //English or spanish
00066                                 lTopicLang.add(annotNode.getAttributes().getNamedItem("langcode").getNodeValue());
00067                                 NodeList listChildAnnot=annotNode.getChildNodes();
00068                                 for (int i=0;i<listChildAnnot.getLength();i++){
00069                                         Node nodeContent=listChildAnnot.item(i);
00070                             if (nodeContent.getNodeName().equals("ID"))
00071                                 id=nodeContent.getTextContent();
00072                             if (nodeContent.getNodeName().equals("MedicalTopicName"))
00073                                 name=nodeContent.getTextContent();
00074                             if (nodeContent.getNodeName().equals("URL"))
00075                                 url=nodeContent.getTextContent();
00076                             if (nodeContent.getNodeName().equals("FullSummary"))
00077                                 fullsum=nodeContent.getTextContent();
00078                             //Groups names
00079                             if (nodeContent.getNodeName().equals("Groups")){
00080                                 NodeList listGroups=nodeContent.getChildNodes();
00081                                 groups="";
00082                                 for (int k=0;k<listGroups.getLength();k++){
00083                                         if (listGroups.item(k).getNodeType()==1)
00084                                                 groups+=(groups.equals(""))?
00085                                                         listGroups.item(k).getChildNodes().item(3).getTextContent():
00086                                                         " | "+listGroups.item(k).getChildNodes().item(3).getTextContent();
00087                                 }
00088                             }
00089                             //Synonyms
00090                             if (nodeContent.getNodeName().equals("Synonyms")){
00091                                 NodeList listSynonyms=nodeContent.getChildNodes();
00092                                 for (int k=0;k<listSynonyms.getLength();k++){
00093                                         if (listSynonyms.item(k).getNodeType()==1)
00094                                                 lSynonym.add(listSynonyms.item(k).getTextContent());
00095                                 }
00096                             }
00097                                 }
00098                                 lID.add(id);
00099                                 lTopicName.add(name);
00100                                 lUrl.add(url);
00101                                 lFullSummary.add(fullsum);
00102                                 lGroupNames.add(groups);
00103                                 lSynonyms.add(lSynonym);
00104                         }
00105                 } catch (Exception e) {
00106                         System.out.println("Exception reading medlineplus xml: "+e.toString());
00107                 }
00108         }
00109         private void loadListFileLan(String filePath,String lan){
00110                 try {
00111                         Writer out = new BufferedWriter(new OutputStreamWriter(
00112                 new FileOutputStream(filePath), "UTF8"));
00113                         //      Write english data
00114                         for (int i=0;i<lID.size();i++){
00115                                 if (lTopicLang.get(i).equals(lan)){
00116                                         //out.write(lTopicName.get(i)+"&name="+lTopicName.get(i)+"&url="+lUrl.get(i)+"&groups="+lGroupNames.get(i)+"\n");
00117                                         out.write(lTopicName.get(i)+"&url="+lUrl.get(i)+"&groups="+lGroupNames.get(i)+"\n");
00118                                         for (int j=0;j<lSynonyms.get(i).size();j++){
00119                                                 //out.write(lSynonyms.get(i).get(j)+"&name="+lSynonyms.get(i).get(j)+"&url="+lUrl.get(i)+"&groups="+lGroupNames.get(i)+"\n");
00120                                                 out.write(lSynonyms.get(i).get(j)+"&url="+lUrl.get(i)+"&groups="+lGroupNames.get(i)+"\n");
00121                                                 
00122                                         }
00123                                 }
00124                         }
00125             out.close();
00126                 } catch (IOException e) {
00127                         e.printStackTrace();
00128                 }
00129         }
00130 
00131         public static void main(String[] args) {
00132                 try{
00133                         // PUT HERE YOUR ABSOLUTE PATH IF YOU WANT LOCAL PROCESS, KEEP EMPTY IF NOT
00134                         String xmlpath="";
00135                         // PUT HERE YOUR LOCAL WEB-INF PATH TO AUTOMATICALLY UPDATE GATE FILES
00136                         String webinf="WebContent/WEB-INF";
00137                         MLPXMLToFile mlp=new MLPXMLToFile(xmlpath);
00138                         System.out.println("READING XML FILES...");
00139                         mlp.xmlToList();
00140                         System.out.println("Topics number: "+mlp.count+
00141                                         " In English: "+mlp.countEnglish+". In Spanish: "+mlp.countSpanish);
00142                         for(int i=0; i<mlp.lID.size();i++){
00143                                 System.out.println("lang: "+mlp.lTopicLang.get(i));
00144                                 System.out.println("ID: "+mlp.lID.get(i));
00145                                 System.out.println("Name: "+mlp.lTopicName.get(i));
00146                                 System.out.println("Url: "+mlp.lUrl.get(i));
00147                                 System.out.println("FullSummary: "+mlp.lFullSummary.get(i));
00148                                 System.out.println("Groups: "+mlp.lGroupNames.get(i));
00149                                 for (int j=0;j<mlp.lSynonyms.get(i).size();j++)
00150                                         System.out.println("Synonyms: "+mlp.lSynonyms.get(i).get(j));
00151                         }
00152                         System.out.println("LOADING GATE LIST FILE...");
00153                         //TO LOAD DISJOINTED LANGUAGE FILES TO THE TERMS
00154                         mlp.loadListFileLan(webinf+Constants.PATH_MLP_LST_EN, "English");
00155                         mlp.loadListFileLan(webinf+Constants.PATH_MLP_LST_SP, "Spanish");
00156                         //USE THIS TO LOAD ENGLISH AND SPANISH TERMS IN THE SAME LIST
00157                         //mlp.loadListFile(Constants.PATH_MLP_LST);
00158                         System.out.println("DONE");
00159                         
00160                 }catch(Exception e){
00161                         System.out.println("Error: "+e.toString());
00162                 }
00163         }
00164         
00165 }