CLEiM
Cross Lingual Education in Medicine
 All Classes Namespaces Files Functions Variables
GateGazMed.java
Go to the documentation of this file.
00001 
00005 package com.uem.gsi.cleim.nlp;
00006 
00007 import gate.Annotation;
00008 import gate.Document;
00009 import gate.Corpus;
00010 import gate.CorpusController;
00011 import gate.AnnotationSet;
00012 import gate.FeatureMap;
00013 import gate.Gate;
00014 import gate.Factory;
00015 import gate.util.*;
00016 import gate.util.persistence.PersistenceManager;
00017 
00018 import java.util.List;
00019 import java.util.ArrayList;
00020 import java.util.Iterator;
00021 import java.util.Vector;
00022 
00023 import java.io.File;
00024 import java.io.IOException;
00025 
00026 public class GateGazMed {
00031   private String xmlGate="";
00032   private String sWebInf="";
00033         private List<String> annotTypesToWrite;
00034   private Vector<String> vAnnotListMinor;
00035   private Vector<List<String>> vAnnotList;
00036   private Vector<List<String>> vAnnotListLan; //Language
00037   private Vector<List<String>> vAnnotListSource; //(Major)
00038   private Vector<List<String>> vAnnotListGroup;
00039         private Vector<List<String>> vAnnotListFrom;
00040         private Vector<List<String>> vAnnotListTo;
00041         private Vector<List<String>> vAnnotListUrl;
00042   
00047   //private String encoding = null;
00048   
00049   public GateGazMed(String pWebInf){
00050         sWebInf=pWebInf;
00051         //List of anotations
00052                 annotTypesToWrite = new ArrayList<String>(1);
00053                 annotTypesToWrite.add("Medicine");
00054                 //Init vectors
00055                 vAnnotListMinor=new Vector<String>();
00056                 //Freebase
00057                 vAnnotListMinor.addElement("disease");
00058                 vAnnotListMinor.addElement("symptom");
00059                 vAnnotListMinor.addElement("treatment");
00060                 //Medlineplus
00061                 vAnnotListMinor.addElement("medlineplus");
00062                 //Snomed ct
00063                 vAnnotListMinor.addElement("snomed");
00064                 //
00065                 vAnnotList=new Vector<List<String>>(vAnnotListMinor.size());
00066                 vAnnotListLan=new Vector<List<String>>(vAnnotListMinor.size());
00067                 vAnnotListSource=new Vector<List<String>>(vAnnotListMinor.size());
00068                 vAnnotListGroup=new Vector<List<String>>(vAnnotListMinor.size());
00069                 vAnnotListFrom=new Vector<List<String>>(vAnnotListMinor.size());
00070                 vAnnotListTo=new Vector<List<String>>(vAnnotListMinor.size());
00071                 vAnnotListUrl=new Vector<List<String>>(vAnnotListMinor.size());
00072                 for (int i=0;i<vAnnotListMinor.size();i++){
00073                         vAnnotList.insertElementAt(new ArrayList<String>(), i);
00074                         vAnnotListLan.insertElementAt(new ArrayList<String>(), i);
00075                         vAnnotListSource.insertElementAt(new ArrayList<String>(), i);
00076                         vAnnotListGroup.insertElementAt(new ArrayList<String>(), i);
00077                         vAnnotListFrom.insertElementAt(new ArrayList<String>(), i);
00078                         vAnnotListTo.insertElementAt(new ArrayList<String>(), i);
00079                         vAnnotListUrl.insertElementAt(new ArrayList<String>(), i);
00080                 }
00081   }
00082 
00083   private void initGate(){
00084         try {
00085                   //System.out.println("gate.home: "+Gate.getGateHome());
00086                 if (Gate.getGateHome()==null){
00087                                 //Gate home, config -> webapp/WEB-INF/gate.xml, plugins -> webapp/WEB-INF/plugins
00088                         File gateHome = new File(sWebInf);
00089                                 Gate.setGateHome(gateHome); 
00090                                 // user config -> webapp/WEB-INF/user-gate.xml 
00091                                 Gate.setUserConfigFile(new File(gateHome, "user-gate.xml"));
00092                 }
00093                 //System.out.println("Is Gate initialized? "+Gate.isInitialised());
00094                         if (!Gate.isInitialised())
00095                                 Gate.init(); 
00096 
00097         } catch (Exception e) {
00098                         e.printStackTrace();
00099                 }
00100   }
00101 
00102   public void runGazetteer(String sDocText,String sGapp){
00103         try {
00104         initGate(); 
00105                   // load the gapp application
00106             CorpusController application =
00107               //(CorpusController)PersistenceManager.loadObjectFromFile(new File(sWebInf+"/CLEiM.gapp"));
00108                 (CorpusController)PersistenceManager.loadObjectFromFile(new File(sWebInf+"/"+sGapp));
00109             // Create a temporally Corpus, process and clear
00110             Corpus corpus = Factory.newCorpus("Medicine Corpus");
00111             application.setCorpus(corpus);
00112             Document doc = Factory.newDocument(sDocText);
00113             corpus.add(doc);
00114             application.execute();
00115             // remove the document from the corpus again
00116             corpus.clear();
00117             
00118             // Extract annotations
00119             Iterator<String> annotTypesIt = annotTypesToWrite.iterator();
00120             while(annotTypesIt.hasNext()) {
00121               String annotSelected=annotTypesIt.next();
00122                 AnnotationSet annotsOfThisType = doc.getAnnotations(annotSelected);
00123                 if(annotsOfThisType != null) {
00124                 for (int i=0; i<annotsOfThisType.size();i++){
00125                         Annotation annot=annotsOfThisType.get(i);
00126                         if (annot!=null){
00127                                 FeatureMap features=annot.getFeatures();
00128                                 //String anotName=(String)features.get("name");
00129                                 String anotLan=(String)features.get("language");
00130                                 String minorType=(String)features.get("minorType");
00131                                 String majorType=(String)features.get("majorType");
00132                                 String groups=(String)features.get("groups");
00133                                 String url=(String)features.get("url");
00134                                 String from=annot.getStartNode().getOffset().toString();
00135                                 String to=annot.getEndNode().getOffset().toString();
00136                                 String anotName=sDocText.substring(new Integer(from), new Integer(to));
00137                                 int j=vAnnotListMinor.indexOf(minorType);
00138                                 if (j>-1){
00139                                         vAnnotList.get(j).add(anotName.toUpperCase());
00140                                         vAnnotListLan.get(j).add(anotLan);
00141                                         vAnnotListSource.get(j).add(majorType);
00142                                         vAnnotListGroup.get(j).add(groups);
00143                                         vAnnotListFrom.get(j).add(from);
00144                                         vAnnotListTo.get(j).add(to);
00145                                         vAnnotListUrl.get(j).add(url);
00146                         }
00147                         }
00148                 }
00149                 }
00150             }
00151             Factory.deleteResource(doc);
00152             Factory.deleteResource(application); 
00153     } catch (GateException e) {
00154                         e.printStackTrace();
00155     } catch (IOException e) {
00156                         e.printStackTrace();
00157                 }
00158   }
00159   public Vector<String> getMinor(){
00160         return this.vAnnotListMinor;
00161   }
00162   public Vector<List<String>> getAnnot(){
00163         return this.vAnnotList;
00164   }
00165   public Vector<List<String>> getAnnotLan(){
00166         return this.vAnnotListLan;
00167   }
00168   public Vector<List<String>> getAnnotSource(){
00169         return this.vAnnotListSource;
00170   }
00171   public Vector<List<String>> getAnnotGroup(){
00172         return this.vAnnotListGroup;
00173   }
00174   public Vector<List<String>> getAnnotFrom(){
00175         return this.vAnnotListFrom;
00176   }
00177   public Vector<List<String>> getAnnotTo(){
00178         return this.vAnnotListTo;
00179   }
00180   public Vector<List<String>> getAnnotUrl(){
00181         return this.vAnnotListUrl;
00182   }
00183   public String getXmlGate(){
00184         return this.xmlGate;
00185   }
00186   public static void main(String args[]) {
00187         String path="WebContent/WEB-INF";
00188         String sGapp="CLEiM.gapp";
00189                 String text= "UNDERLYING MEDICAL CONDITION: "+
00190     " 56 year old man with history of colon and testicular cancer, pons glioma now"+  
00191     " with significant hilar lymphadeonpathry, pulmonary pathology on CXR."+
00192     " REASON FOR THIS EXAMINATION: extracto de genciana colirio/gotas óticas"+
00193     " vagina no hiperestésica  56 year old man with history of colon and testicular cancer, pons glioma now"+  
00194     " Salmonella Irumu SIDA con visión deficiente with significant hilar lymphadeonpathry, pulmonary pathology on CXR."+
00195     " Suture of wound of forelimb Lumbar chemical sympathectomy"+
00196     " vía fístula mucosa No contraindications for IV contrast  pain back pain aspirin cáncer de próstata";
00197                 GateGazMed gateMed=new GateGazMed(path);
00198                 gateMed.runGazetteer(text,sGapp);
00199                 Vector<String> vMinor=gateMed.getMinor();
00200                 Vector<List<String>> vAnnot=gateMed.getAnnot();
00201                 Vector<List<String>> vAnnotSource=gateMed.getAnnotSource();
00202                 Vector<List<String>> vAnnotGroup=gateMed.getAnnotGroup();
00203                 Vector<List<String>> vAnnotFrom=gateMed.getAnnotFrom();
00204                 Vector<List<String>> vAnnotTo=gateMed.getAnnotTo();
00205                 Vector<List<String>> vAnnotUrl=gateMed.getAnnotUrl();
00206                 for (int i=0;i<vMinor.size();i++){
00207                         System.out.println("Minor: "+vMinor.get(i));
00208                         for (int j=0;j<vAnnot.get(i).size();j++){
00209                                 System.out.println("->Annot: "+vAnnot.get(i).get(j));
00210                                 System.out.println("->AnnotSource: "+vAnnotSource.get(i).get(j));
00211                                 System.out.println("->AnnotGroup: "+vAnnotGroup.get(i).get(j));
00212                                 System.out.println("->AnnotFrom: "+vAnnotFrom.get(i).get(j));
00213                                 System.out.println("->AnnotTo: "+vAnnotTo.get(i).get(j));
00214                                 System.out.println("->AnnotUrl: "+vAnnotUrl.get(i).get(j));
00215                         }
00216                 }
00217         }
00218 }