CLEiM
Cross Lingual Education in Medicine
 All Classes Namespaces Files Functions Variables
IntegrateAnnot.java
Go to the documentation of this file.
00001 
00004 package com.uem.gsi.cleim.nlp;
00005 
00006 import java.io.StringWriter;
00007 import java.util.ArrayList;
00008 import java.util.Arrays;
00009 import java.util.List;
00010 import java.util.Vector;
00011 
00012 import javax.xml.parsers.DocumentBuilder;
00013 import javax.xml.parsers.DocumentBuilderFactory;
00014 import javax.xml.transform.OutputKeys;
00015 import javax.xml.transform.Transformer;
00016 import javax.xml.transform.TransformerFactory;
00017 import javax.xml.transform.dom.DOMSource;
00018 import javax.xml.transform.stream.StreamResult;
00019 
00020 import org.apache.commons.collections.ListUtils;
00021 import org.w3c.dom.Attr;
00022 import org.w3c.dom.Document;
00023 import org.w3c.dom.Element;
00024 import org.w3c.dom.Node;
00025 
00026 import com.uem.gsi.cleim.util.Constants;
00027 import com.uem.gsi.cleim.util.ReplaceDefFile;
00028 
00033 public class IntegrateAnnot {
00034         private String text="";
00035         private String gatePath="";
00036         private String onts="";
00037         private String lev="";
00038         private String[] localSrc=new String[3];
00039         private String[] localLan=new String[2];
00040         private Document document = null;
00041         private Element root = null;
00042         public IntegrateAnnot(String ptext,String pgatePath,String ponts, 
00043                         String plev, String[] plocalSrc, String[] plocalLan) {
00044                 text=ptext;
00045                 gatePath=pgatePath;
00046                 onts=ponts;
00047                 lev=plev;
00048                 localSrc=plocalSrc;
00049                 localLan=plocalLan;
00050         }
00051         public GateGazMed gateWithoutFormat() throws Exception {
00052                 //long start = System.currentTimeMillis();
00053                 GateGazMed gateAnnot=new GateGazMed(gatePath);
00054                 String sGapp="CLEiM.gapp";
00055                 if (localSrc[0].equals("") || localSrc[1].equals("") || localSrc[2].equals("")
00056                                 || localLan[0].equals("") || localLan[1].equals("")){
00057                         ReplaceDefFile rdf=new ReplaceDefFile();
00058                         rdf.saveDefFile(localSrc, localLan, gatePath+Constants.PATH_DEF_TMP);
00059                         sGapp="CLEiMtmp.gapp";
00060                 }
00061                 gateAnnot.runGazetteer(text,sGapp);
00062                 //long end = System.currentTimeMillis();
00063                 //System.out.println("GATE Without format: "+ (end-start));
00064                 return gateAnnot;
00065         }
00066         public NCBOAnnot ncboWithoutFormat() throws Exception {
00067                 //long start = System.currentTimeMillis();
00068                 NCBOAnnot ncboAnnot=new NCBOAnnot();
00069                 ncboAnnot.runNCBOAnnot(text, onts, lev);
00070                 //long end = System.currentTimeMillis();
00071                 //System.out.println("NCBO Without format: "+ (end-start));
00072                 return ncboAnnot;
00073         }
00074         public String htmlGateTree() throws Exception {
00075                 String result="";
00076                 GateGazMed gateAnnot=gateWithoutFormat();
00077                 Vector<String> vMinor=gateAnnot.getMinor();
00078                 Vector<List<String>> vAnnot=gateAnnot.getAnnot();
00079                 Vector<List<String>> vAnnotLan=gateAnnot.getAnnotLan();
00080                 Vector<List<String>> vAnnotSource=gateAnnot.getAnnotSource();
00081                 Vector<List<String>> vAnnotGroup=gateAnnot.getAnnotGroup();
00082                 Vector<List<String>> vAnnotFrom=gateAnnot.getAnnotFrom();
00083                 Vector<List<String>> vAnnotTo=gateAnnot.getAnnotTo();
00084                 Vector<List<String>> vAnnotUrl=gateAnnot.getAnnotUrl();
00085                 for (int i=0;i<vMinor.size();i++){
00086                         //Repeated
00087                         List<Integer> lNoRepeated=remRepeated(vAnnot.get(i));
00088                         //System.out.println("Minor: "+vMinor.get(i));
00089                         //for (int j=0;j<vAnnot.get(i).size();j++){
00090                         for (int k=0;k<lNoRepeated.size();k++){
00091                                 int j=lNoRepeated.get(k);
00092                                 result+=insertHtmlItem(vAnnot.get(i).get(j),vAnnotLan.get(i).get(j),vAnnotSource.get(i).get(j),
00093                                                 vAnnotGroup.get(i).get(j), vAnnotFrom.get(i).get(j),vAnnotTo.get(i).get(j),
00094                                                 vAnnotUrl.get(i).get(j),vMinor.get(i)+".jsp?term="+vAnnot.get(i).get(j));
00095                         }
00096                 }
00097                 return result;
00098         }
00099         public String htmlNCBOTree() throws Exception {
00100                 String result="";
00101                 NCBOAnnot ncboAnnot=ncboWithoutFormat();
00102                 List<String> lConcept=ncboAnnot.getConcept();
00103                 List<String> lGroup=ncboAnnot.getGroup();
00104                 List<String> lLocalOntologyId=ncboAnnot.getLocalOntologyId();
00105                 List<String> lFullId=ncboAnnot.getFullId();
00106                 List<String> lIsDirect=ncboAnnot.getIsDirect();
00107                 List<String> lFrom=ncboAnnot.getFrom();
00108                 List<String> lTo=ncboAnnot.getTo();
00109                 List<String> lPreferredName=ncboAnnot.getPreferredName();
00110                 List<String> lContextName=ncboAnnot.getContextName();
00111                 //Add value just if it is not repeated
00112                 List<Integer> lNoRepeated;
00113                 if (lev.equals("0")){
00114                         List<String> ltemp=new ArrayList<String>();
00115                         for (int j=0;j<lConcept.size();j++)
00116                                 ltemp.add(lConcept.get(j)+" "+lLocalOntologyId.get(j));
00117                         lNoRepeated=remRepeated(ltemp);
00118                         //Paint results         
00119                         for (int k=0;k<lNoRepeated.size();k++){
00120                                 int i=lNoRepeated.get(k);
00121                                 //source, localUrl, url
00122                                 String[] data=compoundNCBOData(lConcept.get(i), lLocalOntologyId.get(i),lFullId.get(i));
00123                                 result+=insertHtmlItem(lConcept.get(i),"en",data[0],lGroup.get(i),
00124                                                 lFrom.get(i),lTo.get(i),data[2],data[1]);
00125                         }
00126                 }else{
00127                         List<String> ltemp=new ArrayList<String>();
00128                         for (int j=0;j<lPreferredName.size();j++)
00129                                 ltemp.add(lPreferredName.get(j)+" "+lLocalOntologyId.get(j));
00130                         lNoRepeated=remRepeated(ltemp);
00131                         //Paint results         
00132                         for (int k=0;k<lNoRepeated.size();k++){
00133                                 int i=lNoRepeated.get(k);
00134                                 String[] data=compoundNCBOData(lPreferredName.get(i), lLocalOntologyId.get(i),lFullId.get(i));
00135                                 if (lIsDirect.get(i).equals("true"))
00136                                         result+=insertHtmlItem(lPreferredName.get(i)+" <- "+lConcept.get(i),
00137                                                         "en",data[0],lGroup.get(i),lFrom.get(i),lTo.get(i),data[2],data[1]);
00138                                 else
00139                                         result+=insertHtmlItem(lPreferredName.get(i)+" -> "+lConcept.get(i),
00140                                                         "en",data[0],lGroup.get(i),lFrom.get(i),lTo.get(i),data[2],data[1]);
00141         
00142                         }
00143                 }
00144                 return result;
00145         }
00146         private String insertHtmlItem(String name, String lan, String src, 
00147                         String group,   String from, String to, String url, String localUrl) {
00148                 String result="<tr>";
00149                 result+="<td>"+lan+"</td>";
00150                 result+="<td>";
00151                 //if (src.equals("Snomed")||(src.equals("Medlineplus")&&lan.equals("sp")))
00152                 if (lan.equals("sp"))
00153                         result+=name;
00154                 else
00155                         result+="<a href='"+localUrl+"' target=_blank>"+name+"</a>";
00156                 result+="</td>";
00157                 result+="<td>"+src+"</td>";
00158                 result+="<td>"+group+"</td>";
00159                 //result+="<td>"+from+"</td>";
00160                 //result+="<td>"+to+"</td>";
00161                 result+="<td>";
00162                 if (src.contains("Snomed")){
00163                         result+="<a href='"+Constants.MLP_CONNECT_SNOMED+"&"+
00164                         Constants.MLP_CONNECT_TERM_PARAM+"="+url+"&"+
00165                         Constants.MLP_CONNECT_LAN_PARAM+"=en"+
00166                         "' target=_blank>Go to MP Connect ("+src+")</a>";
00167                         result+=" | <a href='"+Constants.MLP_CONNECT_SNOMED+"&"+
00168                         Constants.MLP_CONNECT_TERM_PARAM+"="+url+"&"+
00169                         Constants.MLP_CONNECT_LAN_PARAM+"=sp"+
00170                         "' target=_blank>Ir a MP Connect ("+src+")</a>";
00171                 }else if (src.equals("MedlinePlus")){
00172                         if (lan.equals("en")){
00173                                 result+="<a href='"+url+"' target=_blank>Go to "+src+"</a>";
00174                                 String urlsp=url.replaceFirst("([a-z]*?\\.html)", "spanish/$0");
00175                                 result+=" | <a href='"+urlsp+"' target=_blank>Ir a "+src+"</a>";
00176                         }else{
00177                                 result+="<a href='"+url.replace("/spanish", "")+"' target=_blank>Go to "+src+"</a>";
00178                                 result+=" | <a href='"+url+"' target=_blank>Ir a "+src+"</a>";
00179                         }
00180                 }else
00181                         result+="<a href='"+url+"' target=_blank>Go to "+src;
00182                 
00183                 result+="</td>";
00184                 result+="</tr>";
00185                 return result;
00186   }
00187         
00188         public void xmlGateTree() throws Exception {
00189                 GateGazMed gateAnnot=gateWithoutFormat();
00190                 Vector<String> vMinor=gateAnnot.getMinor();
00191                 Vector<List<String>> vAnnot=gateAnnot.getAnnot();
00192                 Vector<List<String>> vAnnotLan=gateAnnot.getAnnotLan();
00193                 Vector<List<String>> vAnnotSource=gateAnnot.getAnnotSource();
00194                 Vector<List<String>> vAnnotGroup=gateAnnot.getAnnotGroup();
00195                 Vector<List<String>> vAnnotFrom=gateAnnot.getAnnotFrom();
00196                 Vector<List<String>> vAnnotTo=gateAnnot.getAnnotTo();
00197                 Vector<List<String>> vAnnotUrl=gateAnnot.getAnnotUrl();
00198                 for (int i=0;i<vMinor.size();i++){
00199                         for (int j=0;j<vAnnot.get(i).size();j++){
00200                                 insertXmlItem(vAnnotSource.get(i).get(j),vAnnot.get(i).get(j),
00201                                         vAnnot.get(i).get(j),"true",vAnnotGroup.get(i).get(j),vAnnotLan.get(i).get(j),
00202                                         vAnnotFrom.get(i).get(j),vAnnotTo.get(i).get(j),vAnnotUrl.get(i).get(j),
00203                                         vMinor.get(i)+".jsp?term="+vAnnot.get(i).get(j));
00204                         }
00205                 }
00206                 
00207         }
00208         public void xmlNCBOTree() throws Exception {
00209                 NCBOAnnot ncboAnnot=ncboWithoutFormat();
00210                 List<String> lConcept=ncboAnnot.getConcept();
00211                 List<String> lGroup=ncboAnnot.getGroup();
00212                 List<String> lLocalOntologyId=ncboAnnot.getLocalOntologyId();
00213                 List<String> lFullId=ncboAnnot.getFullId();
00214                 List<String> lIsDirect=ncboAnnot.getIsDirect();
00215                 List<String> lFrom=ncboAnnot.getFrom();
00216                 List<String> lTo=ncboAnnot.getTo();
00217                 List<String> lPreferredName=ncboAnnot.getPreferredName();
00218                 int size=lConcept.size();
00219                 for (int i=0;i<size;i++){
00220                         //source, localUrl, url
00221                         String[] data=compoundNCBOData(lPreferredName.get(i), lLocalOntologyId.get(i), 
00222                                         lFullId.get(i));
00223                         
00224                         //String ont=(lLocalOntologyId.get(i).equals("40397"))?"mlp":lLocalOntologyId.get(i);
00225                         insertXmlItem(data[0],lConcept.get(i),lPreferredName.get(i),lIsDirect.get(i),
00226                                         lGroup.get(i),"en",lFrom.get(i),lTo.get(i),data[2],data[1]);
00227                 }
00228         }
00229         
00230         private void insertXmlItem(String annotType, String concept, String preferred,
00231                         String direct, String groups, String lan, String from, String to,       String url, 
00232                         String localUrl) {
00233     // Insert child Item annotType
00234     Node itemChild = document.createElement("annotation");
00235     root.appendChild(itemChild);
00236     //Source attribute
00237     Attr source = document.createAttribute("source");
00238     source.setValue(annotType);
00239     itemChild.getAttributes().setNamedItem(source);
00240     //Language attribute
00241     Attr language = document.createAttribute("language");
00242     language.setValue(lan);
00243     itemChild.getAttributes().setNamedItem(language);
00244     // Insert child from with text
00245     Node item = document.createElement("concept");
00246     itemChild.appendChild(item);
00247     Attr neg = document.createAttribute("neg");
00248     neg.setValue("0");
00249     item.getAttributes().setNamedItem(neg);
00250     Node value = document.createTextNode(concept);
00251     item.appendChild(value);
00252     //document.createAttribute("neg").setValue("0");
00253     // Insert child from with text
00254     item = document.createElement("from");
00255     itemChild.appendChild(item);
00256     value = document.createTextNode(from);
00257     item.appendChild(value);
00258     // Insert child to with text
00259     item = document.createElement("to");
00260     itemChild.appendChild(item);
00261     value = document.createTextNode(to);
00262     item.appendChild(value);
00263     // Insert child preferred with text
00264     item = document.createElement("preferred");
00265     itemChild.appendChild(item);
00266     Attr dir = document.createAttribute("direct");
00267     dir.setValue(direct);
00268     item.getAttributes().setNamedItem(dir);
00269     value = document.createTextNode(preferred);
00270     item.appendChild(value);
00271     // Insert child local url with text
00272     item = document.createElement("localurl");
00273     itemChild.appendChild(item);
00274     value = document.createTextNode(localUrl);
00275     item.appendChild(value);
00276     // External URLs
00277     String urlen=url;
00278     String urlsp="";
00279     if (annotType.contains("Snomed")){
00280         String urltemp=Constants.MLP_CONNECT_SNOMED+"&"+
00281                 Constants.MLP_CONNECT_TERM_PARAM+"="+url+"&"+Constants.MLP_CONNECT_LAN_PARAM;
00282                         urlen=urltemp+"=en";
00283                         urlsp=urltemp+"=sp";
00284                 }else if (annotType.equals("MedlinePlus")){
00285                         if (lan.equals("en")){
00286                                 urlsp=url.replaceFirst("([a-z]*?\\.html)", "spanish/$0");
00287                         }else{
00288                                 urlen=url.replace("/spanish", "");
00289                                 urlsp=url;
00290                         }
00291                 }
00292     // Insert child urlen with text
00293     item = document.createElement("urlen");
00294     itemChild.appendChild(item);
00295     value = document.createTextNode(urlen);
00296     item.appendChild(value);
00297     // Insert child urlsp with text
00298     item = document.createElement("urlsp");
00299     itemChild.appendChild(item);
00300     value = document.createTextNode(urlsp);
00301     item.appendChild(value);
00302     
00303     // Insert child groups with text
00304     item = document.createElement("groups");
00305     itemChild.appendChild(item);
00306     value = document.createTextNode(groups);
00307     item.appendChild(value);
00308   }
00309         public void initXmlDocument() throws Exception {
00310                 document = null;
00311                 DocumentBuilder builder = null;
00312                 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance();
00313                 builder = factory.newDocumentBuilder();
00314                 document = builder.newDocument();
00315                 
00316     // Root tmt results
00317                 root = (Element) document.createElement("cleim");
00318     document.appendChild(root);
00319     // Insert input fields
00320     Node itemInput = document.createElement("input");
00321     root.appendChild(itemInput);
00322     // -> Text
00323     Node item = document.createElement("text");
00324     itemInput.appendChild(item);
00325     Node value = document.createTextNode(text);
00326     item.appendChild(value);
00327     // -> onts
00328     item = document.createElement("remoteonts");
00329     itemInput.appendChild(item);
00330     value = document.createTextNode(onts);
00331     item.appendChild(value);
00332     // -> lev
00333     item = document.createElement("lev");
00334     itemInput.appendChild(item);
00335     value = document.createTextNode(lev);
00336     item.appendChild(value);
00337     // -> localSrc
00338     item = document.createElement("localSrc");
00339     itemInput.appendChild(item);
00340     value = document.createTextNode(Arrays.toString(localSrc));
00341     item.appendChild(value);
00342     // -> localLan
00343     item = document.createElement("localLan");
00344     itemInput.appendChild(item);
00345     value = document.createTextNode(Arrays.toString(localLan));
00346     item.appendChild(value);
00347     
00348         }
00349         public String getNormalizedXml() throws Exception {
00350                 // Normalizing the DOM
00351     document.getDocumentElement().normalize();
00352                 Transformer transformer = TransformerFactory.newInstance().newTransformer();
00353                 transformer.setOutputProperty(OutputKeys.INDENT, "yes");
00354                 StreamResult result = new StreamResult(new StringWriter());
00355                 DOMSource source = new DOMSource(document);
00356                 transformer.transform(source, result);
00357                 return result.getWriter().toString();
00358         }
00359         private String[] compoundNCBOData(String concept,String localOnt,String fullId){
00360                 String source="OBA";
00361                 String localUrl="medlineplus.jsp?term="+concept;
00362                 String url=fullId;
00363                 if (localOnt.equals("40397")){
00364                         source+=" Medlineplus";
00365                         //localUrl="medlineplus.jsp?term="+concept;
00366                         //url=fullId;
00367           }else if (localOnt.equals("46116")){
00368                         source+=" Snomed";
00369                         //localUrl="medlineplus.jsp?term="+concept;
00370                         url=fullId.replaceAll(".*/", "");
00371           }
00372                 source+=" ("+localOnt+")";
00373                 return new String[] {source,localUrl,url};
00374         }
00375         private List<Integer> remRepeated(List<String> lOrig){
00376                 int size=lOrig.size();
00377                 List<Integer> lRes=new ArrayList<Integer>();
00378                 List<String> lResTemp=new ArrayList<String>();
00379                 for (int i=0;i<size;i++){
00380                         if (!lResTemp.contains(lOrig.get(i))){
00381                                 lRes.add(new Integer(i));
00382                                 lResTemp.add(lOrig.get(i));
00383                         }
00384                 }
00385                 return lRes;
00386         }
00387 }