My favorites | Sign in
Project Home Downloads Wiki Issues Source
Checkout   Browse   Changes    
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
package org.gbif.solr.handler.dataimport;

import org.apache.solr.handler.dataimport.Context;
import org.apache.solr.handler.dataimport.DataImporter;
import org.apache.solr.handler.dataimport.DateFormatTransformer;
import org.apache.solr.handler.dataimport.RegexTransformer;

import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.ArrayList;
import java.util.Calendar;
import java.util.Date;
import java.util.List;
import java.util.Map;
import java.util.logging.Level;
import java.util.logging.Logger;

/**
* Transformer class to handle date ranges, and date lists, for example: 1982-1998, 01/01/1999-02/02/2010.
* The class uses the following parameters: "listDateTimeFormat", "selectedDatePosition","separator" and "lastDay"
* listDateTimeFormat: is the output format for the date
* separator: separator of dates in the range/list, for example: 1900-2010, the separator is "-"
* selectedDatePosition: is the position to use in the range(list) of dates, for example: 1900-2010, using position 2 and separator "-", will process the 2010 because is the second date in the range
* lastDay: true or false, indicating if the date must be defined as the last date of the year. December 31, 23:59:59 will be set as the last day of the year, and January 1st, 00,00 will be set as the first date of the year
* @author Federico Mendez
*
*/
public class ListDateFormatTransformer extends DateFormatTransformer {

private static final Logger LOG = Logger.getLogger(ListDateFormatTransformer.class.getName());

public ListDateFormatTransformer() {
super();
}

@SuppressWarnings("unchecked")
public Object transformRow(Map<String, Object> aRow, Context context) {
for (Map<String, String> map : context.getAllEntityFields()) {
//Gets the transformation parameters
String fmt = map.get(DATE_TIME_FMT);
String position = map.get(DATE_POSITION);
String isLastDay = map.get(LAST_DAY);
String separator = map.get(LIST_SEPARATOR);
if (fmt == null)//if the output format is not found, the transformer is not applied
continue;
String column = map.get(DataImporter.COLUMN);
String srcCol = map.get(RegexTransformer.SRC_COL_NAME);
if (srcCol == null)
srcCol = column;
try {
Object o = aRow.get(srcCol);
if (o instanceof List) {
List<String> inputs = (List<String>) o;
List<Date> results = new ArrayList<Date>();
for (String input : inputs) {
if (position == null || isLastDay == null || separator == null)// if these parameters are not found the value will be processed as normal date transformation
results.add(process(input, fmt));
else
results.add(process(input, fmt, Integer.parseInt(position), separator, Boolean.parseBoolean(isLastDay)));
}
aRow.put(column, results);
} else {
String value = (String) o;
if (position == null || isLastDay == null || separator == null)// if these parameters are not found the value will be processed as normal date transformation
aRow.put(column, process(value, fmt));
else
aRow.put(column, process(value, fmt, Integer.parseInt(position), separator, Boolean.parseBoolean(isLastDay)));
}
} catch (ParseException e) {
LOG.log(Level.WARNING, "Could not parse a Date field ", e);
}
}
return aRow;
}

/**
* Process the value using the full set of parameters
* @param value
* @param format
* @param position
* @param separator
* @param lastDay
* @return
* @throws ParseException
*/
private Date process(String value, String format, int position, String separator, Boolean lastDay) throws ParseException {
if (value == null || value.trim().length() == 0)
return null;

String[] terms = value.trim().split(separator);
if(terms.length >= position && position > 0){
final String dateSelected = terms[position-1].replaceAll("\\(|\\)", "").trim();
if(dateSelected.length() == 4){//Is year only
Calendar cal = Calendar.getInstance();
if(lastDay){
cal.set(Integer.parseInt(dateSelected),11,31,23,59,59);
}
else{
cal.set(Integer.parseInt(dateSelected), 0, 1);
}
value = new SimpleDateFormat(format).format(cal.getTime());
}
}else{
return null;
}

Date resultDate = new SimpleDateFormat(format).parse(value);
LOG.log(Level.WARNING, "Date result " + resultDate);
return resultDate;
}

/**
* Process the value as a normal date transformation
* @param value
* @param format
* @return
* @throws ParseException
*/
private Date process(String value, String format) throws ParseException {
if (value == null || value.trim().length() == 0)
return null;
return new SimpleDateFormat(format).parse(value);
}

/**
* Parameters names constants
*/
public static final String DATE_TIME_FMT = "listDateTimeFormat";

public static final String DATE_POSITION = "selectedDatePosition";

public static final String LIST_SEPARATOR = "separator";

public static final String LAST_DAY = "lastDay";
}

Change log

r357 by federicomh on Apr 29, 2011   Diff
Harvester was refactored, PortalHarvester
and URLHarvester were moved to new files
Go to: 
Project members, sign in to write a code review

Older revisions

All revisions of this file

File info

Size: 5005 bytes, 135 lines
Powered by Google Project Hosting