package parser2;
public interface CSVParser<T> {
T parse();
/**
*
* 해당 메서드는 char[index]가 targetChar와 같을 경우 index를 증분하여 반환
* 만약 매칭 실패시 0을 반환
* @param targetString
* @param targetChar
* @param index
* @return index
*/
default int incrementIfMatch(char[] targetString, char targetChar, int index) {
return targetChar == targetString[index] ? index + 1 : 0;
}
boolean canParse();
}
Java
복사
package parser2;
import lombok.AccessLevel;
import lombok.Getter;
import java.io.IOException;
import java.io.Reader;
import java.io.UncheckedIOException;
import java.util.Objects;
@Getter
class InternalBuffer {
@Getter(AccessLevel.NONE)
private final Reader reader;
private final char[] buf;
private char[] row;
private int pos, limit, rowLimit;
InternalBuffer(Reader reader, int size) {
this.reader = reader;
this.buf = new char[size];
}
boolean notPosAtLimit() {
return pos < limit;
}
boolean fill() {
try {
pos = 0;
limit = reader.read(buf);
return limit != -1;
} catch (IOException e) {
throw new UncheckedIOException(e);
}
}
public void setRowLimit(int rowLimit) {
this.rowLimit = rowLimit;
}
void setNewPosition(int newPos) {
this.pos = newPos;
}
char charAt(int cursor) {
return buf[cursor];
}
void mergeOrAppendChars(int length) {
if (Objects.isNull(row)) {
row = new char[length];
System.arraycopy(buf, pos, row, 0, length);
this.rowLimit = length;
} else if (row.length <= rowLimit + length) {
int newLength = (row.length + length) * 2;
int newLimit = this.rowLimit + length;
char[] newChars = new char[newLength];
System.arraycopy(row, 0, newChars, 0, this.rowLimit);
System.arraycopy(buf, pos, newChars, this.rowLimit, length);
row = newChars;
this.rowLimit = newLimit;
} else if (rowLimit == 0) {
System.arraycopy(buf, pos, row, 0, length);
this.rowLimit = length;
} else {
System.arraycopy(buf, pos, row, this.rowLimit, length);
this.rowLimit += length;
}
}
}
Java
복사
package parser2;
import java.io.Reader;
import java.util.ArrayList;
import java.util.List;
public class RecordParser implements CSVParser<List<Record>> {
private final InternalBuffer buffer;
private final CSVConfig csvConfig;
private final CSVParser<List<String>> fieldParser;
public RecordParser(Reader reader, int bufferCapacity ,CSVConfig csvConfig) {
if (bufferCapacity <= 0) throw new IllegalArgumentException("Buffer capacity cannot be less than 1.");
this.buffer = new InternalBuffer(reader, bufferCapacity);
this.csvConfig = csvConfig;
this.fieldParser = new FieldParser(buffer, new CommentParser(buffer, csvConfig),csvConfig);
}
@Override
public List<Record> parse() {
final List<Record> records = new ArrayList<>();
int order = 0;
int matchIndex = 0;
boolean inQuote = false;
while (buffer.fill()) {
int limit = buffer.getLimit();
for (int cursor = 0; cursor < limit; cursor++) {
char c = buffer.charAt(cursor);
if (c == csvConfig.getQuote()) {
inQuote = !inQuote;
continue;
}
if (!inQuote && (matchIndex = incrementIfMatch(csvConfig.getLineDelimiter(), c, matchIndex)) == csvConfig.getLineDelimiterLength()) {
// buffer 의 길이를 넘어서 matchIndex를 발견할 경우 자르는 위치가 애매해짐 그렇기에 제외해서 넘길 수 없음
// 예를 들면 버퍼의 크기가 1024일때 마지막 (\r)문자열 중 일부만 끝날 경우 뒤의 1024 버퍼에서 matchIndex 만큼 빼버리면 음수가 나옴
int length = (matchIndex == 1 ? cursor + matchIndex : cursor + matchIndex - 1) - buffer.getPos();
buffer.mergeOrAppendChars(length);
records.add(new Record(order++, false, fieldParser.parse()));
buffer.setNewPosition(cursor + 1);
matchIndex = 0;
buffer.setRowLimit(0);
}
}
if (buffer.notPosAtLimit()) {
buffer.mergeOrAppendChars(buffer.getLimit() - buffer.getPos());
}
}
if (buffer.getRowLimit() != 0){
records.add(new Record(order, false, fieldParser.parse()));
buffer.setRowLimit(0);
}
return records;
}
@Override
public boolean canParse() {
throw new UnsupportedOperationException("this method is Unsupported method");
}
}
Java
복사
package parser2;
import java.util.ArrayList;
import java.util.List;
public class FieldParser implements CSVParser<List<String>>{
private final InternalBuffer buffer;
private final CSVConfig csvConfig;
private final CSVParser<List<String>> commentParser;
private StringBuilder builder = new StringBuilder();
public FieldParser(InternalBuffer buffer, CSVParser<List<String>> commentParser, CSVConfig csvConfig) {
this.buffer = buffer;
this.commentParser = commentParser;
this.csvConfig = csvConfig;
}
@Override
public List<String> parse() {
if (commentParser.canParse()) return commentParser.parse();
final List<String> fields = new ArrayList<>();
int limit = buffer.getRowLimit() - csvConfig.getFieldSeparatorLength();
int pos = 0;
int matchIndex = 0;
boolean inQuote = false;
boolean isQuoteField = false;
for (int cursor = 0; cursor < limit; cursor++) {
char c = buffer.getRow()[cursor];
if (c == csvConfig.getQuote()) {
inQuote = !inQuote;
isQuoteField = true;
} else if (!inQuote && (matchIndex = incrementIfMatch(csvConfig.getFieldSeparator(), c, matchIndex)) == csvConfig.getFieldSeparatorLength()) {
int length = (matchIndex == 1 ? cursor : cursor - matchIndex + 1) - pos;
if (isQuoteField) {
quoteFieldParse(pos, length);
isQuoteField = false;
} else {
builder.append(buffer.getRow(), pos, length);
}
addField(fields);
builder.setLength(0);
pos = cursor + 1;
matchIndex = 0;
}
}
if (isQuoteField) {
quoteFieldParse(pos, limit);
} else {
// 둘다 처리가 된다. 예를 들어 limit가 5이고 pos가 3일 경우도 처리되고, 둘다 동일한 길이를 가지면 결국 구분자가 마지막에 존재하기 때문에 빈값 추가
builder.append(buffer.getRow(), pos, limit - pos);
}
addField(fields);
builder.setLength(0);
return fields;
}
private void quoteFieldParse(int pos, int length) {
int doubleQuote = 0; // 1 이 되면 삭제
pos = pos + 1; // 시작점 찾아야함 근데 TODO 인용부호임을 보장함
int limit = length - 1; // 마지막이 인용부호 인지 확인 -1 TODO 인용부호임을 보장함
for (int cursor = pos; cursor < limit; cursor++) {
char c = buffer.getRow()[cursor];
if (c == csvConfig.getQuote()) {
doubleQuote++;
}
if (doubleQuote == 2) {
builder.append(buffer.getRow(), pos, cursor - pos);
doubleQuote = 0;
pos = cursor + 1;
}
}
if (pos < limit) {
builder.append(buffer.getRow(), pos, limit - pos);
}
}
private void addField(List<String> fields){
fields.add(builder.toString());
}
@Override
public boolean canParse() {
throw new UnsupportedOperationException("this method is Unsupported method");
}
}
Java
복사
package parser2;
import java.util.ArrayList;
import java.util.List;
public class CommentParser implements CSVParser<List<String>>{
final InternalBuffer buffer;
final CSVConfig csvConfig;
public CommentParser(InternalBuffer buffer, CSVConfig csvConfig) {
this.buffer = buffer;
this.csvConfig = csvConfig;
}
@Override
public List<String> parse() {
final List<String> list = new ArrayList<>(1);
list.add(new String(buffer.getRow()));
return list;
}
@Override
public boolean canParse() {
return buffer.getRow()[0] == csvConfig.getComment();
}
}
Java
복사
package parser2;
import lombok.ToString;
import java.util.List;
@ToString
public class Record implements Comparable<Record>{
private int order;
private boolean isComment;
private List<String> fields;
private int count;
public Record(int order, boolean isComment, List<String> fields) {
this.order = order;
this.isComment = isComment;
this.fields = fields;
this.count = fields.size();
}
@Override
public int compareTo(Record o) {
return order - o.order;
}
}
Java
복사