-
Notifications
You must be signed in to change notification settings - Fork 1
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
0 parents
commit 266bea0
Showing
11 changed files
with
307 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,5 @@ | ||
.idea/ | ||
.idea_modules/ | ||
target/ | ||
project/project | ||
project/target |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,14 @@ | ||
name := "ing-parser" | ||
|
||
version := "1.0" | ||
|
||
scalaVersion := "2.11.7" | ||
|
||
libraryDependencies += "joda-time" % "joda-time" % "1.6.2" | ||
|
||
libraryDependencies += "org.scalatest" %% "scalatest" % "2.2.6" % "test" | ||
|
||
libraryDependencies += "org.scala-lang.modules" %% "scala-parser-combinators" % "1.0.4" | ||
|
||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,46 @@ | ||
package ro.dvulpe.ingparser | ||
|
||
import scala.util.parsing.combinator._ | ||
import scala.util.parsing.input.Reader | ||
|
||
|
||
trait CSVParser extends RegexParsers { | ||
override val skipWhitespace = false // meaningful spaces in CSV | ||
|
||
override val whiteSpace = "".r | ||
|
||
|
||
override protected def handleWhiteSpace(source: CharSequence, offset: Int): Int = | ||
offset | ||
|
||
val COMMA = "," | ||
val DQUOTE = "\"" | ||
val DQUOTE_ESC = "\"\"" ^^^ "\"" | ||
|
||
val CRLF = "\r\n" | "\n" | ||
val TXT = "[^\",\r\n]".r | ||
val SPACES = "[ \t]+".r | ||
|
||
val escaped = { | ||
((SPACES ?) ~> DQUOTE ~> ((COMMA | CRLF | TXT | DQUOTE_ESC) *) <~ DQUOTE <~ (SPACES ?)) ^^ { | ||
case ls => ls.mkString("") | ||
} | ||
} | ||
|
||
val nonEscaped = (TXT *) ^^ { | ||
case ls => ls.mkString | ||
} | ||
|
||
val field = escaped | nonEscaped | ||
|
||
val record = repsep(field, COMMA) | ||
|
||
val file = repsep(record, CRLF) <~ (CRLF ?) | ||
|
||
def parse(s: Reader[Char]) = parseAll(file, s) match { | ||
case Success(res, _) => res | ||
case e => throw new Exception(e.toString) | ||
} | ||
} | ||
|
||
object CSVParser extends CSVParser |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
package ro.dvulpe.ingparser | ||
|
||
import org.joda.time.DateTime | ||
|
||
import scala.util.Try | ||
import scala.util.parsing.combinator._ | ||
|
||
case class Summary(date: DateTime, details: String, debit: Option[BigDecimal], credit: Option[BigDecimal]) { | ||
def asList = date :: details :: debit.getOrElse("") :: credit.getOrElse("") :: Nil | ||
|
||
def asString = s"${date.toLocalDate.toString} - $details, Debit: $debit, Credit: $credit" | ||
} | ||
|
||
case class IngRecord(summary: Summary, details: Seq[String]) { | ||
def asString: String = { | ||
summary.asString + details.map("\t" + _).mkString("\n") | ||
} | ||
} | ||
|
||
object INGParser extends RegexParsers with CSVParser { | ||
|
||
def asDate(data: String) = Parser { | ||
in: Input => | ||
Try(data.asDateTime) match { | ||
case scala.util.Success(localDate) => Success(localDate, in) | ||
case scala.util.Failure(ex) => Failure(ex.toString, in) | ||
} | ||
} | ||
|
||
val date: Parser[DateTime] = field >> asDate | ||
|
||
def asDecimal(data: String) = Parser { | ||
in: Input => | ||
Try(data.asDecimal) match { | ||
case scala.util.Success(value) => Success(value, in) | ||
case scala.util.Failure(ex) => Failure(ex.getMessage, in) | ||
} | ||
} | ||
|
||
val decimal = field >> asDecimal | ||
|
||
val transDetail = COMMA ~> field <~ repsep(field, COMMA) | ||
|
||
val transDetails = rep(transDetail <~ (CRLF ?)) | ||
|
||
val summaryLine = | ||
(date <~ COMMA) ~ (field <~ COMMA) ~ (decimal.? <~ COMMA) ~ (decimal ?) ^^ { | ||
case transactionDate ~ details ~ credit ~ debit => | ||
Summary(transactionDate, details, credit, debit) | ||
} | ||
|
||
val ingRecord = | ||
(summaryLine <~ CRLF) ~ (transDetails ?) ^^ { | ||
case summary ~ trans => | ||
IngRecord(summary, trans.getOrElse(Seq.empty)) | ||
} | ||
|
||
val header = ("Data" ~> rep1sep(field, COMMA)) ~> CRLF | ||
|
||
val ingRecords = header.? ~> rep(ingRecord <~ CRLF.?) | ||
|
||
def parseRecords(s: String): List[IngRecord] = parseAll(ingRecords, s) match { | ||
case Success(res, _) => res | ||
case e => throw new Exception(e.toString) | ||
} | ||
|
||
def parseTest[T](in: String, parser: Parser[T]): T = { | ||
parseAll(parser, in) match { | ||
case Success(res, _) => res | ||
case e => throw new Exception(e.toString) | ||
} | ||
} | ||
} |
20 changes: 20 additions & 0 deletions
20
src/main/scala/ro/dvulpe/ingparser/ParenthesisParser.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,20 @@ | ||
package ro.dvulpe.ingparser | ||
|
||
import scala.util.parsing.combinator.RegexParsers | ||
|
||
object ParenthesisParser extends RegexParsers { | ||
def term = "[^\\(\\)]+".r | ||
|
||
val expr: Parser[String] = "(" ~ rep(term | expr) ~ ")" ^^ { | ||
case open ~ l ~ closed => s"$open $l $closed" | ||
} | ||
|
||
lazy val allExpr: Parser[List[String]] = rep(expr) | ||
|
||
def apply(input: String) = parseAll(allExpr, input) | ||
|
||
def isValid(input: String): Boolean = parseAll(allExpr, input) match { | ||
case s@Success(_, _) => true | ||
case _ => false | ||
} | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,41 @@ | ||
package ro.dvulpe.ingparser | ||
|
||
import scala.io.Source | ||
import org.joda.time.DateTime | ||
import org.joda.time.format.DateTimeFormat | ||
import java.util.Locale | ||
|
||
object Parser extends App { | ||
|
||
val source = Source.fromFile("/Users/dan/Downloads/Tranzactii_pe_perioada-5.csv").mkString("") | ||
|
||
implicit def stringToDate(input: String): DateTime = | ||
DateTimeFormat.forPattern("dd MMMMM yyyy").withLocale(new Locale("RO")).parseDateTime(input) | ||
|
||
val start = System.currentTimeMillis() | ||
// val input = "(bla(bla) (bla)(bla)(bla)(bla)(bla))(bla)(bla)" | ||
// (1 to 1000000).foreach { | ||
// _ => | ||
// _ => | ||
|
||
implicit val dateOrdering = new Ordering[DateTime] { | ||
def compare(x: DateTime, y: DateTime): Int = | ||
x.compareTo(y) | ||
} | ||
|
||
// implicit val dateTimeOrdering: Ordering[DateTime] = comparableToOrdering(null) | ||
|
||
val records: List[IngRecord] = INGParser.parseRecords(source).sortBy(_.summary.date) | ||
val duration = System.currentTimeMillis() - start | ||
println(records.size) | ||
records.foreach(r => println(r.asString)) | ||
// val records = ParenthesisParser(input) | ||
// println(records.successful) | ||
// } | ||
println(s"total duration ${duration}ms") | ||
|
||
// val output = records.map { | ||
// rec => (rec.summary.asList ++ List(rec.details.mkString(" "))).map(f => f).mkString(",") | ||
// }.mkString("\n") | ||
// println(output) | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
package ro.dvulpe.ingparser | ||
|
||
import org.joda.time.DateTime | ||
|
||
case class Transaction(id: String, date: DateTime, notes: String, amount: BigDecimal, | ||
payee: String, accountRef: Option[String], bank: Option[String], | ||
reference: String) | ||
|
||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,18 @@ | ||
package ro.dvulpe | ||
|
||
import org.joda.time.format.DateTimeFormat | ||
import java.util.Locale | ||
import java.text.{NumberFormat, DecimalFormat} | ||
|
||
package object ingparser { | ||
implicit def richString(input: String) = new { | ||
def asDateTime = DateTimeFormat.forPattern("dd MMMMM yyyy").withLocale(new Locale("RO")).parseDateTime(input) | ||
|
||
def asDecimal = { | ||
val format: DecimalFormat = NumberFormat.getInstance(new Locale("RO")).asInstanceOf[DecimalFormat] | ||
format.setParseBigDecimal(true) | ||
BigDecimal.apply(format.parse(input).asInstanceOf[java.math.BigDecimal]) | ||
} | ||
} | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,9 @@ | ||
package ro.dvulpe.main | ||
|
||
//import scala.util.App | ||
|
||
object MainClass extends App { | ||
|
||
// val input = read | ||
|
||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,59 @@ | ||
package ro.dvulpe.ingparser | ||
|
||
import org.scalatest.FunSuite | ||
import org.joda.time.DateTime | ||
|
||
class INGParserTest extends FunSuite { | ||
|
||
test("transaction detail line should be parsed") { | ||
val input = ",Beneficiar: RCS AND RDS SA,," | ||
val result = INGParser.parseTest(input, INGParser.transDetail) | ||
println(result) | ||
assert(result === "Beneficiar: RCS AND RDS SA") | ||
} | ||
|
||
test("transaction details lines should be parsed") { | ||
val input = ",Beneficiar: RCS AND RDS SA,,\n,Banca: INGB CENTRALA,,\n,Referinta: 70979872,," | ||
val result = INGParser.parseTest(input, INGParser.transDetails) | ||
println(result) | ||
assert(result === List("Beneficiar: RCS AND RDS SA", "Banca: INGB CENTRALA", "Referinta: 70979872")) | ||
} | ||
|
||
test("transaction summary line should be parsed") { | ||
val input = "30 septembrie 2013,Plata debit direct,\"120,19\"," | ||
val result = INGParser.parseTest(input, INGParser.summaryLine) | ||
assert(result === Summary(new DateTime("2013-09-30T00:00:00.000+03:00"), "Plata debit direct", Some(BigDecimal("120.19")), None)) | ||
} | ||
|
||
test("transaction with details should be parsed") { | ||
val input = "30 septembrie 2013,Plata debit direct,\"120,19\",\n,Beneficiar: RCS AND RDS SA,,\n,Banca: INGB CENTRALA,,\n,Referinta: 70979872,," | ||
val result = INGParser.parseTest(input, INGParser.ingRecord) | ||
assert(result != null) | ||
} | ||
|
||
test("transaction without details should be parsed") { | ||
val input = "30 septembrie 2013,Plata debit direct,\"120,19\",\n" | ||
val result = INGParser.parseTest(input, INGParser.ingRecord) | ||
assert(result != null) | ||
} | ||
|
||
test("two consecutive transactions should be parsed") { | ||
val input = "30 septembrie 2013,Plata debit direct,\"120,19\",\n30 septembrie 2013,Plata debit direct,\"120,19\",\n" | ||
val result = INGParser.parseRecords(input) | ||
assert(result.size === 2) | ||
} | ||
|
||
test("header should be matched") { | ||
val input = "Data,Detalii tranzactie,Debit,Credit\n" | ||
val result = INGParser.parseTest(input, INGParser.header) | ||
assert(result != null) | ||
} | ||
|
||
test("transaction fiel with header should be matched") { | ||
val input = "Data,Detalii tranzactie,Debit,Credit\n30 septembrie 2013,Plata debit direct,\"120,19\",\n30 septembrie 2013,Plata debit direct,\"120,19\",\n" | ||
val result = INGParser.parseRecords(input) | ||
assert(result.size === 2) | ||
|
||
} | ||
|
||
} |
13 changes: 13 additions & 0 deletions
13
src/test/scala/ro/dvulpe/ingparser/ParenthesisParserTest.scala
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,13 @@ | ||
package ro.dvulpe.ingparser | ||
|
||
import org.scalatest.{FunSuite, Matchers} | ||
|
||
class ParenthesisParserTest extends FunSuite with Matchers { | ||
test("balanced parentheses") { | ||
ParenthesisParser.isValid("()") should equal (true) | ||
} | ||
test("unbalanced parentheses") { | ||
ParenthesisParser.isValid(")((test)test(test2)))") should equal (false) | ||
} | ||
|
||
} |