CLEiM
Cross Lingual Education in Medicine
|
00001 00004 package com.uem.gsi.cleim.nlp; 00005 00006 import java.io.StringWriter; 00007 import java.util.ArrayList; 00008 import java.util.Arrays; 00009 import java.util.List; 00010 import java.util.Vector; 00011 00012 import javax.xml.parsers.DocumentBuilder; 00013 import javax.xml.parsers.DocumentBuilderFactory; 00014 import javax.xml.transform.OutputKeys; 00015 import javax.xml.transform.Transformer; 00016 import javax.xml.transform.TransformerFactory; 00017 import javax.xml.transform.dom.DOMSource; 00018 import javax.xml.transform.stream.StreamResult; 00019 00020 import org.apache.commons.collections.ListUtils; 00021 import org.w3c.dom.Attr; 00022 import org.w3c.dom.Document; 00023 import org.w3c.dom.Element; 00024 import org.w3c.dom.Node; 00025 00026 import com.uem.gsi.cleim.util.Constants; 00027 import com.uem.gsi.cleim.util.ReplaceDefFile; 00028 00033 public class IntegrateAnnot { 00034 private String text=""; 00035 private String gatePath=""; 00036 private String onts=""; 00037 private String lev=""; 00038 private String[] localSrc=new String[3]; 00039 private String[] localLan=new String[2]; 00040 private Document document = null; 00041 private Element root = null; 00042 public IntegrateAnnot(String ptext,String pgatePath,String ponts, 00043 String plev, String[] plocalSrc, String[] plocalLan) { 00044 text=ptext; 00045 gatePath=pgatePath; 00046 onts=ponts; 00047 lev=plev; 00048 localSrc=plocalSrc; 00049 localLan=plocalLan; 00050 } 00051 public GateGazMed gateWithoutFormat() throws Exception { 00052 //long start = System.currentTimeMillis(); 00053 GateGazMed gateAnnot=new GateGazMed(gatePath); 00054 String sGapp="CLEiM.gapp"; 00055 if (localSrc[0].equals("") || localSrc[1].equals("") || localSrc[2].equals("") 00056 || localLan[0].equals("") || localLan[1].equals("")){ 00057 ReplaceDefFile rdf=new ReplaceDefFile(); 00058 rdf.saveDefFile(localSrc, localLan, gatePath+Constants.PATH_DEF_TMP); 00059 sGapp="CLEiMtmp.gapp"; 00060 } 00061 gateAnnot.runGazetteer(text,sGapp); 00062 //long end = System.currentTimeMillis(); 00063 //System.out.println("GATE Without format: "+ (end-start)); 00064 return gateAnnot; 00065 } 00066 public NCBOAnnot ncboWithoutFormat() throws Exception { 00067 //long start = System.currentTimeMillis(); 00068 NCBOAnnot ncboAnnot=new NCBOAnnot(); 00069 ncboAnnot.runNCBOAnnot(text, onts, lev); 00070 //long end = System.currentTimeMillis(); 00071 //System.out.println("NCBO Without format: "+ (end-start)); 00072 return ncboAnnot; 00073 } 00074 public String htmlGateTree() throws Exception { 00075 String result=""; 00076 GateGazMed gateAnnot=gateWithoutFormat(); 00077 Vector<String> vMinor=gateAnnot.getMinor(); 00078 Vector<List<String>> vAnnot=gateAnnot.getAnnot(); 00079 Vector<List<String>> vAnnotLan=gateAnnot.getAnnotLan(); 00080 Vector<List<String>> vAnnotSource=gateAnnot.getAnnotSource(); 00081 Vector<List<String>> vAnnotGroup=gateAnnot.getAnnotGroup(); 00082 Vector<List<String>> vAnnotFrom=gateAnnot.getAnnotFrom(); 00083 Vector<List<String>> vAnnotTo=gateAnnot.getAnnotTo(); 00084 Vector<List<String>> vAnnotUrl=gateAnnot.getAnnotUrl(); 00085 for (int i=0;i<vMinor.size();i++){ 00086 //Repeated 00087 List<Integer> lNoRepeated=remRepeated(vAnnot.get(i)); 00088 //System.out.println("Minor: "+vMinor.get(i)); 00089 //for (int j=0;j<vAnnot.get(i).size();j++){ 00090 for (int k=0;k<lNoRepeated.size();k++){ 00091 int j=lNoRepeated.get(k); 00092 result+=insertHtmlItem(vAnnot.get(i).get(j),vAnnotLan.get(i).get(j),vAnnotSource.get(i).get(j), 00093 vAnnotGroup.get(i).get(j), vAnnotFrom.get(i).get(j),vAnnotTo.get(i).get(j), 00094 vAnnotUrl.get(i).get(j),vMinor.get(i)+".jsp?term="+vAnnot.get(i).get(j)); 00095 } 00096 } 00097 return result; 00098 } 00099 public String htmlNCBOTree() throws Exception { 00100 String result=""; 00101 NCBOAnnot ncboAnnot=ncboWithoutFormat(); 00102 List<String> lConcept=ncboAnnot.getConcept(); 00103 List<String> lGroup=ncboAnnot.getGroup(); 00104 List<String> lLocalOntologyId=ncboAnnot.getLocalOntologyId(); 00105 List<String> lFullId=ncboAnnot.getFullId(); 00106 List<String> lIsDirect=ncboAnnot.getIsDirect(); 00107 List<String> lFrom=ncboAnnot.getFrom(); 00108 List<String> lTo=ncboAnnot.getTo(); 00109 List<String> lPreferredName=ncboAnnot.getPreferredName(); 00110 List<String> lContextName=ncboAnnot.getContextName(); 00111 //Add value just if it is not repeated 00112 List<Integer> lNoRepeated; 00113 if (lev.equals("0")){ 00114 List<String> ltemp=new ArrayList<String>(); 00115 for (int j=0;j<lConcept.size();j++) 00116 ltemp.add(lConcept.get(j)+" "+lLocalOntologyId.get(j)); 00117 lNoRepeated=remRepeated(ltemp); 00118 //Paint results 00119 for (int k=0;k<lNoRepeated.size();k++){ 00120 int i=lNoRepeated.get(k); 00121 //source, localUrl, url 00122 String[] data=compoundNCBOData(lConcept.get(i), lLocalOntologyId.get(i),lFullId.get(i)); 00123 result+=insertHtmlItem(lConcept.get(i),"en",data[0],lGroup.get(i), 00124 lFrom.get(i),lTo.get(i),data[2],data[1]); 00125 } 00126 }else{ 00127 List<String> ltemp=new ArrayList<String>(); 00128 for (int j=0;j<lPreferredName.size();j++) 00129 ltemp.add(lPreferredName.get(j)+" "+lLocalOntologyId.get(j)); 00130 lNoRepeated=remRepeated(ltemp); 00131 //Paint results 00132 for (int k=0;k<lNoRepeated.size();k++){ 00133 int i=lNoRepeated.get(k); 00134 String[] data=compoundNCBOData(lPreferredName.get(i), lLocalOntologyId.get(i),lFullId.get(i)); 00135 if (lIsDirect.get(i).equals("true")) 00136 result+=insertHtmlItem(lPreferredName.get(i)+" <- "+lConcept.get(i), 00137 "en",data[0],lGroup.get(i),lFrom.get(i),lTo.get(i),data[2],data[1]); 00138 else 00139 result+=insertHtmlItem(lPreferredName.get(i)+" -> "+lConcept.get(i), 00140 "en",data[0],lGroup.get(i),lFrom.get(i),lTo.get(i),data[2],data[1]); 00141 00142 } 00143 } 00144 return result; 00145 } 00146 private String insertHtmlItem(String name, String lan, String src, 00147 String group, String from, String to, String url, String localUrl) { 00148 String result="<tr>"; 00149 result+="<td>"+lan+"</td>"; 00150 result+="<td>"; 00151 //if (src.equals("Snomed")||(src.equals("Medlineplus")&&lan.equals("sp"))) 00152 if (lan.equals("sp")) 00153 result+=name; 00154 else 00155 result+="<a href='"+localUrl+"' target=_blank>"+name+"</a>"; 00156 result+="</td>"; 00157 result+="<td>"+src+"</td>"; 00158 result+="<td>"+group+"</td>"; 00159 //result+="<td>"+from+"</td>"; 00160 //result+="<td>"+to+"</td>"; 00161 result+="<td>"; 00162 if (src.contains("Snomed")){ 00163 result+="<a href='"+Constants.MLP_CONNECT_SNOMED+"&"+ 00164 Constants.MLP_CONNECT_TERM_PARAM+"="+url+"&"+ 00165 Constants.MLP_CONNECT_LAN_PARAM+"=en"+ 00166 "' target=_blank>Go to MP Connect ("+src+")</a>"; 00167 result+=" | <a href='"+Constants.MLP_CONNECT_SNOMED+"&"+ 00168 Constants.MLP_CONNECT_TERM_PARAM+"="+url+"&"+ 00169 Constants.MLP_CONNECT_LAN_PARAM+"=sp"+ 00170 "' target=_blank>Ir a MP Connect ("+src+")</a>"; 00171 }else if (src.equals("MedlinePlus")){ 00172 if (lan.equals("en")){ 00173 result+="<a href='"+url+"' target=_blank>Go to "+src+"</a>"; 00174 String urlsp=url.replaceFirst("([a-z]*?\\.html)", "spanish/$0"); 00175 result+=" | <a href='"+urlsp+"' target=_blank>Ir a "+src+"</a>"; 00176 }else{ 00177 result+="<a href='"+url.replace("/spanish", "")+"' target=_blank>Go to "+src+"</a>"; 00178 result+=" | <a href='"+url+"' target=_blank>Ir a "+src+"</a>"; 00179 } 00180 }else 00181 result+="<a href='"+url+"' target=_blank>Go to "+src; 00182 00183 result+="</td>"; 00184 result+="</tr>"; 00185 return result; 00186 } 00187 00188 public void xmlGateTree() throws Exception { 00189 GateGazMed gateAnnot=gateWithoutFormat(); 00190 Vector<String> vMinor=gateAnnot.getMinor(); 00191 Vector<List<String>> vAnnot=gateAnnot.getAnnot(); 00192 Vector<List<String>> vAnnotLan=gateAnnot.getAnnotLan(); 00193 Vector<List<String>> vAnnotSource=gateAnnot.getAnnotSource(); 00194 Vector<List<String>> vAnnotGroup=gateAnnot.getAnnotGroup(); 00195 Vector<List<String>> vAnnotFrom=gateAnnot.getAnnotFrom(); 00196 Vector<List<String>> vAnnotTo=gateAnnot.getAnnotTo(); 00197 Vector<List<String>> vAnnotUrl=gateAnnot.getAnnotUrl(); 00198 for (int i=0;i<vMinor.size();i++){ 00199 for (int j=0;j<vAnnot.get(i).size();j++){ 00200 insertXmlItem(vAnnotSource.get(i).get(j),vAnnot.get(i).get(j), 00201 vAnnot.get(i).get(j),"true",vAnnotGroup.get(i).get(j),vAnnotLan.get(i).get(j), 00202 vAnnotFrom.get(i).get(j),vAnnotTo.get(i).get(j),vAnnotUrl.get(i).get(j), 00203 vMinor.get(i)+".jsp?term="+vAnnot.get(i).get(j)); 00204 } 00205 } 00206 00207 } 00208 public void xmlNCBOTree() throws Exception { 00209 NCBOAnnot ncboAnnot=ncboWithoutFormat(); 00210 List<String> lConcept=ncboAnnot.getConcept(); 00211 List<String> lGroup=ncboAnnot.getGroup(); 00212 List<String> lLocalOntologyId=ncboAnnot.getLocalOntologyId(); 00213 List<String> lFullId=ncboAnnot.getFullId(); 00214 List<String> lIsDirect=ncboAnnot.getIsDirect(); 00215 List<String> lFrom=ncboAnnot.getFrom(); 00216 List<String> lTo=ncboAnnot.getTo(); 00217 List<String> lPreferredName=ncboAnnot.getPreferredName(); 00218 int size=lConcept.size(); 00219 for (int i=0;i<size;i++){ 00220 //source, localUrl, url 00221 String[] data=compoundNCBOData(lPreferredName.get(i), lLocalOntologyId.get(i), 00222 lFullId.get(i)); 00223 00224 //String ont=(lLocalOntologyId.get(i).equals("40397"))?"mlp":lLocalOntologyId.get(i); 00225 insertXmlItem(data[0],lConcept.get(i),lPreferredName.get(i),lIsDirect.get(i), 00226 lGroup.get(i),"en",lFrom.get(i),lTo.get(i),data[2],data[1]); 00227 } 00228 } 00229 00230 private void insertXmlItem(String annotType, String concept, String preferred, 00231 String direct, String groups, String lan, String from, String to, String url, 00232 String localUrl) { 00233 // Insert child Item annotType 00234 Node itemChild = document.createElement("annotation"); 00235 root.appendChild(itemChild); 00236 //Source attribute 00237 Attr source = document.createAttribute("source"); 00238 source.setValue(annotType); 00239 itemChild.getAttributes().setNamedItem(source); 00240 //Language attribute 00241 Attr language = document.createAttribute("language"); 00242 language.setValue(lan); 00243 itemChild.getAttributes().setNamedItem(language); 00244 // Insert child from with text 00245 Node item = document.createElement("concept"); 00246 itemChild.appendChild(item); 00247 Attr neg = document.createAttribute("neg"); 00248 neg.setValue("0"); 00249 item.getAttributes().setNamedItem(neg); 00250 Node value = document.createTextNode(concept); 00251 item.appendChild(value); 00252 //document.createAttribute("neg").setValue("0"); 00253 // Insert child from with text 00254 item = document.createElement("from"); 00255 itemChild.appendChild(item); 00256 value = document.createTextNode(from); 00257 item.appendChild(value); 00258 // Insert child to with text 00259 item = document.createElement("to"); 00260 itemChild.appendChild(item); 00261 value = document.createTextNode(to); 00262 item.appendChild(value); 00263 // Insert child preferred with text 00264 item = document.createElement("preferred"); 00265 itemChild.appendChild(item); 00266 Attr dir = document.createAttribute("direct"); 00267 dir.setValue(direct); 00268 item.getAttributes().setNamedItem(dir); 00269 value = document.createTextNode(preferred); 00270 item.appendChild(value); 00271 // Insert child local url with text 00272 item = document.createElement("localurl"); 00273 itemChild.appendChild(item); 00274 value = document.createTextNode(localUrl); 00275 item.appendChild(value); 00276 // External URLs 00277 String urlen=url; 00278 String urlsp=""; 00279 if (annotType.contains("Snomed")){ 00280 String urltemp=Constants.MLP_CONNECT_SNOMED+"&"+ 00281 Constants.MLP_CONNECT_TERM_PARAM+"="+url+"&"+Constants.MLP_CONNECT_LAN_PARAM; 00282 urlen=urltemp+"=en"; 00283 urlsp=urltemp+"=sp"; 00284 }else if (annotType.equals("MedlinePlus")){ 00285 if (lan.equals("en")){ 00286 urlsp=url.replaceFirst("([a-z]*?\\.html)", "spanish/$0"); 00287 }else{ 00288 urlen=url.replace("/spanish", ""); 00289 urlsp=url; 00290 } 00291 } 00292 // Insert child urlen with text 00293 item = document.createElement("urlen"); 00294 itemChild.appendChild(item); 00295 value = document.createTextNode(urlen); 00296 item.appendChild(value); 00297 // Insert child urlsp with text 00298 item = document.createElement("urlsp"); 00299 itemChild.appendChild(item); 00300 value = document.createTextNode(urlsp); 00301 item.appendChild(value); 00302 00303 // Insert child groups with text 00304 item = document.createElement("groups"); 00305 itemChild.appendChild(item); 00306 value = document.createTextNode(groups); 00307 item.appendChild(value); 00308 } 00309 public void initXmlDocument() throws Exception { 00310 document = null; 00311 DocumentBuilder builder = null; 00312 DocumentBuilderFactory factory = DocumentBuilderFactory.newInstance(); 00313 builder = factory.newDocumentBuilder(); 00314 document = builder.newDocument(); 00315 00316 // Root tmt results 00317 root = (Element) document.createElement("cleim"); 00318 document.appendChild(root); 00319 // Insert input fields 00320 Node itemInput = document.createElement("input"); 00321 root.appendChild(itemInput); 00322 // -> Text 00323 Node item = document.createElement("text"); 00324 itemInput.appendChild(item); 00325 Node value = document.createTextNode(text); 00326 item.appendChild(value); 00327 // -> onts 00328 item = document.createElement("remoteonts"); 00329 itemInput.appendChild(item); 00330 value = document.createTextNode(onts); 00331 item.appendChild(value); 00332 // -> lev 00333 item = document.createElement("lev"); 00334 itemInput.appendChild(item); 00335 value = document.createTextNode(lev); 00336 item.appendChild(value); 00337 // -> localSrc 00338 item = document.createElement("localSrc"); 00339 itemInput.appendChild(item); 00340 value = document.createTextNode(Arrays.toString(localSrc)); 00341 item.appendChild(value); 00342 // -> localLan 00343 item = document.createElement("localLan"); 00344 itemInput.appendChild(item); 00345 value = document.createTextNode(Arrays.toString(localLan)); 00346 item.appendChild(value); 00347 00348 } 00349 public String getNormalizedXml() throws Exception { 00350 // Normalizing the DOM 00351 document.getDocumentElement().normalize(); 00352 Transformer transformer = TransformerFactory.newInstance().newTransformer(); 00353 transformer.setOutputProperty(OutputKeys.INDENT, "yes"); 00354 StreamResult result = new StreamResult(new StringWriter()); 00355 DOMSource source = new DOMSource(document); 00356 transformer.transform(source, result); 00357 return result.getWriter().toString(); 00358 } 00359 private String[] compoundNCBOData(String concept,String localOnt,String fullId){ 00360 String source="OBA"; 00361 String localUrl="medlineplus.jsp?term="+concept; 00362 String url=fullId; 00363 if (localOnt.equals("40397")){ 00364 source+=" Medlineplus"; 00365 //localUrl="medlineplus.jsp?term="+concept; 00366 //url=fullId; 00367 }else if (localOnt.equals("46116")){ 00368 source+=" Snomed"; 00369 //localUrl="medlineplus.jsp?term="+concept; 00370 url=fullId.replaceAll(".*/", ""); 00371 } 00372 source+=" ("+localOnt+")"; 00373 return new String[] {source,localUrl,url}; 00374 } 00375 private List<Integer> remRepeated(List<String> lOrig){ 00376 int size=lOrig.size(); 00377 List<Integer> lRes=new ArrayList<Integer>(); 00378 List<String> lResTemp=new ArrayList<String>(); 00379 for (int i=0;i<size;i++){ 00380 if (!lResTemp.contains(lOrig.get(i))){ 00381 lRes.add(new Integer(i)); 00382 lResTemp.add(lOrig.get(i)); 00383 } 00384 } 00385 return lRes; 00386 } 00387 }