Changeset 2559
- Timestamp:
- 11/03/08 16:39:41 (2 months ago)
- Files:
Legend:
- Unmodified
- Added
- Removed
- Modified
- Copied
- Moved
trunk/plugins/preprocessing/edu.iu.nwb.preprocessing.text.normalization/src/edu/iu/nwb/preprocessing/text/normalization/StandardNormalyzerFactory.java
r2233 r2559 1 1 package edu.iu.nwb.preprocessing.text.normalization; 2 2 3 import java.io.BufferedReader; 3 4 import java.io.IOException; 5 import java.io.InputStream; 6 import java.io.InputStreamReader; 4 7 import java.util.ArrayList; 5 8 import java.util.Dictionary; 6 9 import java.util.List; 10 import java.net.URL; 11 import java.net.URLConnection; 7 12 8 13 import org.cishell.framework.CIShellContext; … … 13 18 import org.cishell.reference.service.metatype.BasicAttributeDefinition; 14 19 import org.cishell.reference.service.metatype.BasicObjectClassDefinition; 20 import org.osgi.framework.BundleContext; 21 import org.osgi.service.component.ComponentContext; 15 22 import org.osgi.service.metatype.AttributeDefinition; 16 23 import org.osgi.service.metatype.ObjectClassDefinition; … … 23 30 24 31 protected static final String PREFIX = "column_"; 25 26 public Algorithm createAlgorithm(Data[] data, Dictionary parameters, CIShellContext context) { 27 return new StandardNormalyzer(data, parameters, context); 32 private static final String stopWordsFilePath = 33 "/edu/iu/nwb/preprocessing/text/normalization/stopwords.txt"; 34 35 private BundleContext bContext; 36 private String[] stopWords = null; 37 38 protected void activate(ComponentContext ctxt) { 39 bContext = ctxt.getBundleContext(); 28 40 } 41 42 public Algorithm createAlgorithm(Data[] data, Dictionary parameters, 43 CIShellContext context) { 44 URL filePath = bContext.getBundle().getResource(stopWordsFilePath); 45 stopWords = getStopWords (filePath); 46 return new StandardNormalyzer(data, parameters, context, stopWords); 47 } 29 48 30 49 public ObjectClassDefinition mutateParameters(Data[] data, … … 33 52 34 53 ObjectClassDefinition oldDefinition = parameters; 35 36 37 38 54 String[] columnNames = createKeyArray(t.getSchema()); 39 55 … … 78 94 return (String[]) keys.toArray(new String[]{}); 79 95 } 96 97 private String[] getStopWords(URL filePathURL) { 98 99 InputStream inStream = null; 100 BufferedReader input = null; 101 String line; 102 ArrayList list = new ArrayList(); 103 String[]stopWords = null; 104 105 try { 106 URLConnection connection = filePathURL.openConnection(); 107 connection.setDoInput(true); 108 inStream = connection.getInputStream(); 109 input = new BufferedReader(new InputStreamReader(inStream, "UTF-8")); 110 111 while (null != (line = input.readLine())) { 112 list.add(line); 113 } 114 stopWords = new String[list.size()]; 115 for (int ii=0; ii<list.size(); ii++){ 116 stopWords[ii] = (String) list.get(ii); 117 System.out.println(">>Debug: index = "+ii+", value = "+stopWords[ii]); 118 } 119 } 120 catch (Exception e) { 121 e.printStackTrace(); 122 } 123 finally { 124 try { 125 if (input != null) input.close(); 126 if (inStream != null) inStream.close(); 127 } 128 catch (IOException e) { 129 e.printStackTrace(); 130 } 131 } 132 return stopWords; 133 134 } 135 136 137 80 138 }
