-
Notifications
You must be signed in to change notification settings - Fork 0
Expand file tree
/
Copy pathFileSourceOperator.java
More file actions
142 lines (129 loc) · 4.54 KB
/
FileSourceOperator.java
File metadata and controls
142 lines (129 loc) · 4.54 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
package operators;
import utils.WordSplitter;
import workers.DataTuple;
import workers.storage.IKVStorage;
import java.io.*;
import java.util.ArrayList;
import java.util.List;
public class FileSourceOperator extends SourceOperator {
private final String filePath;
private ArrayList<String> wordBuffer;
private BufferedReader reader;
private int wordBufferIndex;
private boolean loop = false;
private boolean endOfFile = false;
public FileSourceOperator(String filePath) throws IOException {
this.filePath = filePath;
this.wordBuffer = new ArrayList<>();
this.wordBufferIndex = 0;
loadFile();
warmUp();
}
public void setLoop(boolean loop) {
this.loop = loop;
}
public boolean getLoop() {
return loop;
}
private void loadFile() throws IOException {
File file = new File(filePath);
reader = new BufferedReader(new FileReader(file));
// String line;
// while ((line = reader.readLine()) != null) {
// wordBuffer.addAll(List.of(WordSplitter.split(line)));
// }
}
private ArrayList<String> readLine() throws IOException {
String line = reader.readLine();
if (line == null) {
return null;
}
return new ArrayList<>(List.of(WordSplitter.split(line)));
}
private void warmUp(){ // read 1000 lines to word-buffer
wordBuffer = new ArrayList<>();
for (int i = 0; i < 1000; i++) {
try {
wordBuffer.addAll(readLine());
} catch (IOException e) {
e.printStackTrace();
}
}
}
@Override
public DataTuple process(IKVStorage storage, DataTuple dataTuple) {
DataTuple.Builder builder = DataTuple.newBuilder(dataTuple);
try {
if (wordBuffer == null) { // End of file
if (loop) {
loadFile();
wordBuffer = readLine();
} else {
reader.close();
return cleanUp();
}
}
} catch (IOException e) {
e.printStackTrace();
}
if (wordBufferIndex >= wordBuffer.size()) {
// Read a new line
try {
wordBuffer = readLine();
while (wordBuffer != null && wordBuffer.size() == 0) {
wordBuffer = readLine();
}
if (wordBuffer == null) { // End of file
if (loop) {
loadFile();
wordBuffer = readLine();
} else {
reader.close();
return cleanUp();
}
}
wordBufferIndex = 0;
} catch (IOException e) {
e.printStackTrace();
}
}
String word = wordBuffer.get(wordBufferIndex);
builder.setData(word);
builder.setCount(0);
// builder.setTimestamp(System.currentTimeMillis());
builder.setTimestamp(System.nanoTime()); // This is used for critical analysis
wordBufferIndex++;
return builder.build();
}
private DataTuple cleanUp() {
if (endOfFile) {
return null;
}
endOfFile = true;
DataTuple.Builder builder = DataTuple.newBuilder();
builder.setData("END_OF_FILE");
// builder.setTimestamp(System.currentTimeMillis());
builder.setTimestamp(System.nanoTime()); // This is used for critical analysis
return builder.build();
}
@Override
public List<DataTuple> process(IKVStorage storage, List<DataTuple> dataTuples) {
List<DataTuple> result = new ArrayList<>();
for (DataTuple dataTuple : dataTuples) {
result.add(process(storage, dataTuple));
if (dataTuple.getData().equals("END_OF_FILE")) {
break;
}
}
return result;
}
public static void main(String[] args) throws IOException {
DataTuple dataTuple = DataTuple.newBuilder().setReconfigRequest("none").setData("INIT").setCount(1).build();
FileSourceOperator fileSourceOperator = new FileSourceOperator(
"dataflow/TheCompleteWorksOfWilliamShakespearebyWilliamShakespeare.txt");
while (!dataTuple.getData().equals("END_OF_FILE")) {
dataTuple = fileSourceOperator.process(null, dataTuple);
System.out.println(dataTuple.getData() + " " + dataTuple.getCount());
}
}
}