Search

code

package parser2; public interface CSVParser<T> { T parse(); /** * * 해당 메서드는 char[index]가 targetChar와 같을 경우 index를 증분하여 반환 * 만약 매칭 실패시 0을 반환 * @param targetString * @param targetChar * @param index * @return index */ default int incrementIfMatch(char[] targetString, char targetChar, int index) { return targetChar == targetString[index] ? index + 1 : 0; } boolean canParse(); }
Java
복사
package parser2; import lombok.AccessLevel; import lombok.Getter; import java.io.IOException; import java.io.Reader; import java.io.UncheckedIOException; import java.util.Objects; @Getter class InternalBuffer { @Getter(AccessLevel.NONE) private final Reader reader; private final char[] buf; private char[] row; private int pos, limit, rowLimit; InternalBuffer(Reader reader, int size) { this.reader = reader; this.buf = new char[size]; } boolean notPosAtLimit() { return pos < limit; } boolean fill() { try { pos = 0; limit = reader.read(buf); return limit != -1; } catch (IOException e) { throw new UncheckedIOException(e); } } public void setRowLimit(int rowLimit) { this.rowLimit = rowLimit; } void setNewPosition(int newPos) { this.pos = newPos; } char charAt(int cursor) { return buf[cursor]; } void mergeOrAppendChars(int length) { if (Objects.isNull(row)) { row = new char[length]; System.arraycopy(buf, pos, row, 0, length); this.rowLimit = length; } else if (row.length <= rowLimit + length) { int newLength = (row.length + length) * 2; int newLimit = this.rowLimit + length; char[] newChars = new char[newLength]; System.arraycopy(row, 0, newChars, 0, this.rowLimit); System.arraycopy(buf, pos, newChars, this.rowLimit, length); row = newChars; this.rowLimit = newLimit; } else if (rowLimit == 0) { System.arraycopy(buf, pos, row, 0, length); this.rowLimit = length; } else { System.arraycopy(buf, pos, row, this.rowLimit, length); this.rowLimit += length; } } }
Java
복사
package parser2; import java.io.Reader; import java.util.ArrayList; import java.util.List; public class RecordParser implements CSVParser<List<Record>> { private final InternalBuffer buffer; private final CSVConfig csvConfig; private final CSVParser<List<String>> fieldParser; public RecordParser(Reader reader, int bufferCapacity ,CSVConfig csvConfig) { if (bufferCapacity <= 0) throw new IllegalArgumentException("Buffer capacity cannot be less than 1."); this.buffer = new InternalBuffer(reader, bufferCapacity); this.csvConfig = csvConfig; this.fieldParser = new FieldParser(buffer, new CommentParser(buffer, csvConfig),csvConfig); } @Override public List<Record> parse() { final List<Record> records = new ArrayList<>(); int order = 0; int matchIndex = 0; boolean inQuote = false; while (buffer.fill()) { int limit = buffer.getLimit(); for (int cursor = 0; cursor < limit; cursor++) { char c = buffer.charAt(cursor); if (c == csvConfig.getQuote()) { inQuote = !inQuote; continue; } if (!inQuote && (matchIndex = incrementIfMatch(csvConfig.getLineDelimiter(), c, matchIndex)) == csvConfig.getLineDelimiterLength()) { // buffer 의 길이를 넘어서 matchIndex를 발견할 경우 자르는 위치가 애매해짐 그렇기에 제외해서 넘길 수 없음 // 예를 들면 버퍼의 크기가 1024일때 마지막 (\r)문자열 중 일부만 끝날 경우 뒤의 1024 버퍼에서 matchIndex 만큼 빼버리면 음수가 나옴 int length = (matchIndex == 1 ? cursor + matchIndex : cursor + matchIndex - 1) - buffer.getPos(); buffer.mergeOrAppendChars(length); records.add(new Record(order++, false, fieldParser.parse())); buffer.setNewPosition(cursor + 1); matchIndex = 0; buffer.setRowLimit(0); } } if (buffer.notPosAtLimit()) { buffer.mergeOrAppendChars(buffer.getLimit() - buffer.getPos()); } } if (buffer.getRowLimit() != 0){ records.add(new Record(order, false, fieldParser.parse())); buffer.setRowLimit(0); } return records; } @Override public boolean canParse() { throw new UnsupportedOperationException("this method is Unsupported method"); } }
Java
복사
package parser2; import java.util.ArrayList; import java.util.List; public class FieldParser implements CSVParser<List<String>>{ private final InternalBuffer buffer; private final CSVConfig csvConfig; private final CSVParser<List<String>> commentParser; private StringBuilder builder = new StringBuilder(); public FieldParser(InternalBuffer buffer, CSVParser<List<String>> commentParser, CSVConfig csvConfig) { this.buffer = buffer; this.commentParser = commentParser; this.csvConfig = csvConfig; } @Override public List<String> parse() { if (commentParser.canParse()) return commentParser.parse(); final List<String> fields = new ArrayList<>(); int limit = buffer.getRowLimit() - csvConfig.getFieldSeparatorLength(); int pos = 0; int matchIndex = 0; boolean inQuote = false; boolean isQuoteField = false; for (int cursor = 0; cursor < limit; cursor++) { char c = buffer.getRow()[cursor]; if (c == csvConfig.getQuote()) { inQuote = !inQuote; isQuoteField = true; } else if (!inQuote && (matchIndex = incrementIfMatch(csvConfig.getFieldSeparator(), c, matchIndex)) == csvConfig.getFieldSeparatorLength()) { int length = (matchIndex == 1 ? cursor : cursor - matchIndex + 1) - pos; if (isQuoteField) { quoteFieldParse(pos, length); isQuoteField = false; } else { builder.append(buffer.getRow(), pos, length); } addField(fields); builder.setLength(0); pos = cursor + 1; matchIndex = 0; } } if (isQuoteField) { quoteFieldParse(pos, limit); } else { // 둘다 처리가 된다. 예를 들어 limit가 5이고 pos가 3일 경우도 처리되고, 둘다 동일한 길이를 가지면 결국 구분자가 마지막에 존재하기 때문에 빈값 추가 builder.append(buffer.getRow(), pos, limit - pos); } addField(fields); builder.setLength(0); return fields; } private void quoteFieldParse(int pos, int length) { int doubleQuote = 0; // 1 이 되면 삭제 pos = pos + 1; // 시작점 찾아야함 근데 TODO 인용부호임을 보장함 int limit = length - 1; // 마지막이 인용부호 인지 확인 -1 TODO 인용부호임을 보장함 for (int cursor = pos; cursor < limit; cursor++) { char c = buffer.getRow()[cursor]; if (c == csvConfig.getQuote()) { doubleQuote++; } if (doubleQuote == 2) { builder.append(buffer.getRow(), pos, cursor - pos); doubleQuote = 0; pos = cursor + 1; } } if (pos < limit) { builder.append(buffer.getRow(), pos, limit - pos); } } private void addField(List<String> fields){ fields.add(builder.toString()); } @Override public boolean canParse() { throw new UnsupportedOperationException("this method is Unsupported method"); } }
Java
복사
package parser2; import java.util.ArrayList; import java.util.List; public class CommentParser implements CSVParser<List<String>>{ final InternalBuffer buffer; final CSVConfig csvConfig; public CommentParser(InternalBuffer buffer, CSVConfig csvConfig) { this.buffer = buffer; this.csvConfig = csvConfig; } @Override public List<String> parse() { final List<String> list = new ArrayList<>(1); list.add(new String(buffer.getRow())); return list; } @Override public boolean canParse() { return buffer.getRow()[0] == csvConfig.getComment(); } }
Java
복사
package parser2; import lombok.ToString; import java.util.List; @ToString public class Record implements Comparable<Record>{ private int order; private boolean isComment; private List<String> fields; private int count; public Record(int order, boolean isComment, List<String> fields) { this.order = order; this.isComment = isComment; this.fields = fields; this.count = fields.size(); } @Override public int compareTo(Record o) { return order - o.order; } }
Java
복사