CLEiM
Cross Lingual Education in Medicine
|
00001 00005 package com.uem.gsi.cleim.nlp; 00006 00007 import gate.Annotation; 00008 import gate.Document; 00009 import gate.Corpus; 00010 import gate.CorpusController; 00011 import gate.AnnotationSet; 00012 import gate.FeatureMap; 00013 import gate.Gate; 00014 import gate.Factory; 00015 import gate.util.*; 00016 import gate.util.persistence.PersistenceManager; 00017 00018 import java.util.List; 00019 import java.util.ArrayList; 00020 import java.util.Iterator; 00021 import java.util.Vector; 00022 00023 import java.io.File; 00024 import java.io.IOException; 00025 00026 public class GateGazMed { 00031 private String xmlGate=""; 00032 private String sWebInf=""; 00033 private List<String> annotTypesToWrite; 00034 private Vector<String> vAnnotListMinor; 00035 private Vector<List<String>> vAnnotList; 00036 private Vector<List<String>> vAnnotListLan; //Language 00037 private Vector<List<String>> vAnnotListSource; //(Major) 00038 private Vector<List<String>> vAnnotListGroup; 00039 private Vector<List<String>> vAnnotListFrom; 00040 private Vector<List<String>> vAnnotListTo; 00041 private Vector<List<String>> vAnnotListUrl; 00042 00047 //private String encoding = null; 00048 00049 public GateGazMed(String pWebInf){ 00050 sWebInf=pWebInf; 00051 //List of anotations 00052 annotTypesToWrite = new ArrayList<String>(1); 00053 annotTypesToWrite.add("Medicine"); 00054 //Init vectors 00055 vAnnotListMinor=new Vector<String>(); 00056 //Freebase 00057 vAnnotListMinor.addElement("disease"); 00058 vAnnotListMinor.addElement("symptom"); 00059 vAnnotListMinor.addElement("treatment"); 00060 //Medlineplus 00061 vAnnotListMinor.addElement("medlineplus"); 00062 //Snomed ct 00063 vAnnotListMinor.addElement("snomed"); 00064 // 00065 vAnnotList=new Vector<List<String>>(vAnnotListMinor.size()); 00066 vAnnotListLan=new Vector<List<String>>(vAnnotListMinor.size()); 00067 vAnnotListSource=new Vector<List<String>>(vAnnotListMinor.size()); 00068 vAnnotListGroup=new Vector<List<String>>(vAnnotListMinor.size()); 00069 vAnnotListFrom=new Vector<List<String>>(vAnnotListMinor.size()); 00070 vAnnotListTo=new Vector<List<String>>(vAnnotListMinor.size()); 00071 vAnnotListUrl=new Vector<List<String>>(vAnnotListMinor.size()); 00072 for (int i=0;i<vAnnotListMinor.size();i++){ 00073 vAnnotList.insertElementAt(new ArrayList<String>(), i); 00074 vAnnotListLan.insertElementAt(new ArrayList<String>(), i); 00075 vAnnotListSource.insertElementAt(new ArrayList<String>(), i); 00076 vAnnotListGroup.insertElementAt(new ArrayList<String>(), i); 00077 vAnnotListFrom.insertElementAt(new ArrayList<String>(), i); 00078 vAnnotListTo.insertElementAt(new ArrayList<String>(), i); 00079 vAnnotListUrl.insertElementAt(new ArrayList<String>(), i); 00080 } 00081 } 00082 00083 private void initGate(){ 00084 try { 00085 //System.out.println("gate.home: "+Gate.getGateHome()); 00086 if (Gate.getGateHome()==null){ 00087 //Gate home, config -> webapp/WEB-INF/gate.xml, plugins -> webapp/WEB-INF/plugins 00088 File gateHome = new File(sWebInf); 00089 Gate.setGateHome(gateHome); 00090 // user config -> webapp/WEB-INF/user-gate.xml 00091 Gate.setUserConfigFile(new File(gateHome, "user-gate.xml")); 00092 } 00093 //System.out.println("Is Gate initialized? "+Gate.isInitialised()); 00094 if (!Gate.isInitialised()) 00095 Gate.init(); 00096 00097 } catch (Exception e) { 00098 e.printStackTrace(); 00099 } 00100 } 00101 00102 public void runGazetteer(String sDocText,String sGapp){ 00103 try { 00104 initGate(); 00105 // load the gapp application 00106 CorpusController application = 00107 //(CorpusController)PersistenceManager.loadObjectFromFile(new File(sWebInf+"/CLEiM.gapp")); 00108 (CorpusController)PersistenceManager.loadObjectFromFile(new File(sWebInf+"/"+sGapp)); 00109 // Create a temporally Corpus, process and clear 00110 Corpus corpus = Factory.newCorpus("Medicine Corpus"); 00111 application.setCorpus(corpus); 00112 Document doc = Factory.newDocument(sDocText); 00113 corpus.add(doc); 00114 application.execute(); 00115 // remove the document from the corpus again 00116 corpus.clear(); 00117 00118 // Extract annotations 00119 Iterator<String> annotTypesIt = annotTypesToWrite.iterator(); 00120 while(annotTypesIt.hasNext()) { 00121 String annotSelected=annotTypesIt.next(); 00122 AnnotationSet annotsOfThisType = doc.getAnnotations(annotSelected); 00123 if(annotsOfThisType != null) { 00124 for (int i=0; i<annotsOfThisType.size();i++){ 00125 Annotation annot=annotsOfThisType.get(i); 00126 if (annot!=null){ 00127 FeatureMap features=annot.getFeatures(); 00128 //String anotName=(String)features.get("name"); 00129 String anotLan=(String)features.get("language"); 00130 String minorType=(String)features.get("minorType"); 00131 String majorType=(String)features.get("majorType"); 00132 String groups=(String)features.get("groups"); 00133 String url=(String)features.get("url"); 00134 String from=annot.getStartNode().getOffset().toString(); 00135 String to=annot.getEndNode().getOffset().toString(); 00136 String anotName=sDocText.substring(new Integer(from), new Integer(to)); 00137 int j=vAnnotListMinor.indexOf(minorType); 00138 if (j>-1){ 00139 vAnnotList.get(j).add(anotName.toUpperCase()); 00140 vAnnotListLan.get(j).add(anotLan); 00141 vAnnotListSource.get(j).add(majorType); 00142 vAnnotListGroup.get(j).add(groups); 00143 vAnnotListFrom.get(j).add(from); 00144 vAnnotListTo.get(j).add(to); 00145 vAnnotListUrl.get(j).add(url); 00146 } 00147 } 00148 } 00149 } 00150 } 00151 Factory.deleteResource(doc); 00152 Factory.deleteResource(application); 00153 } catch (GateException e) { 00154 e.printStackTrace(); 00155 } catch (IOException e) { 00156 e.printStackTrace(); 00157 } 00158 } 00159 public Vector<String> getMinor(){ 00160 return this.vAnnotListMinor; 00161 } 00162 public Vector<List<String>> getAnnot(){ 00163 return this.vAnnotList; 00164 } 00165 public Vector<List<String>> getAnnotLan(){ 00166 return this.vAnnotListLan; 00167 } 00168 public Vector<List<String>> getAnnotSource(){ 00169 return this.vAnnotListSource; 00170 } 00171 public Vector<List<String>> getAnnotGroup(){ 00172 return this.vAnnotListGroup; 00173 } 00174 public Vector<List<String>> getAnnotFrom(){ 00175 return this.vAnnotListFrom; 00176 } 00177 public Vector<List<String>> getAnnotTo(){ 00178 return this.vAnnotListTo; 00179 } 00180 public Vector<List<String>> getAnnotUrl(){ 00181 return this.vAnnotListUrl; 00182 } 00183 public String getXmlGate(){ 00184 return this.xmlGate; 00185 } 00186 public static void main(String args[]) { 00187 String path="WebContent/WEB-INF"; 00188 String sGapp="CLEiM.gapp"; 00189 String text= "UNDERLYING MEDICAL CONDITION: "+ 00190 " 56 year old man with history of colon and testicular cancer, pons glioma now"+ 00191 " with significant hilar lymphadeonpathry, pulmonary pathology on CXR."+ 00192 " REASON FOR THIS EXAMINATION: extracto de genciana colirio/gotas óticas"+ 00193 " vagina no hiperestésica 56 year old man with history of colon and testicular cancer, pons glioma now"+ 00194 " Salmonella Irumu SIDA con visión deficiente with significant hilar lymphadeonpathry, pulmonary pathology on CXR."+ 00195 " Suture of wound of forelimb Lumbar chemical sympathectomy"+ 00196 " vía fístula mucosa No contraindications for IV contrast pain back pain aspirin cáncer de próstata"; 00197 GateGazMed gateMed=new GateGazMed(path); 00198 gateMed.runGazetteer(text,sGapp); 00199 Vector<String> vMinor=gateMed.getMinor(); 00200 Vector<List<String>> vAnnot=gateMed.getAnnot(); 00201 Vector<List<String>> vAnnotSource=gateMed.getAnnotSource(); 00202 Vector<List<String>> vAnnotGroup=gateMed.getAnnotGroup(); 00203 Vector<List<String>> vAnnotFrom=gateMed.getAnnotFrom(); 00204 Vector<List<String>> vAnnotTo=gateMed.getAnnotTo(); 00205 Vector<List<String>> vAnnotUrl=gateMed.getAnnotUrl(); 00206 for (int i=0;i<vMinor.size();i++){ 00207 System.out.println("Minor: "+vMinor.get(i)); 00208 for (int j=0;j<vAnnot.get(i).size();j++){ 00209 System.out.println("->Annot: "+vAnnot.get(i).get(j)); 00210 System.out.println("->AnnotSource: "+vAnnotSource.get(i).get(j)); 00211 System.out.println("->AnnotGroup: "+vAnnotGroup.get(i).get(j)); 00212 System.out.println("->AnnotFrom: "+vAnnotFrom.get(i).get(j)); 00213 System.out.println("->AnnotTo: "+vAnnotTo.get(i).get(j)); 00214 System.out.println("->AnnotUrl: "+vAnnotUrl.get(i).get(j)); 00215 } 00216 } 00217 } 00218 }