Skip to content

Commit

Permalink
ING Parser initial commit
Browse files Browse the repository at this point in the history
  • Loading branch information
dvulpe committed Jan 12, 2016
0 parents commit 266bea0
Show file tree
Hide file tree
Showing 11 changed files with 307 additions and 0 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
.idea/
.idea_modules/
target/
project/project
project/target
14 changes: 14 additions & 0 deletions build.sbt
Original file line number Diff line number Diff line change
@@ -0,0 +1,14 @@
name := "ing-parser"

version := "1.0"

scalaVersion := "2.11.7"

libraryDependencies += "joda-time" % "joda-time" % "1.6.2"

libraryDependencies += "org.scalatest" %% "scalatest" % "2.2.6" % "test"

libraryDependencies += "org.scala-lang.modules" %% "scala-parser-combinators" % "1.0.4"



46 changes: 46 additions & 0 deletions src/main/scala/ro/dvulpe/ingparser/CSVParser.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
package ro.dvulpe.ingparser

import scala.util.parsing.combinator._
import scala.util.parsing.input.Reader


trait CSVParser extends RegexParsers {
override val skipWhitespace = false // meaningful spaces in CSV

override val whiteSpace = "".r


override protected def handleWhiteSpace(source: CharSequence, offset: Int): Int =
offset

val COMMA = ","
val DQUOTE = "\""
val DQUOTE_ESC = "\"\"" ^^^ "\""

val CRLF = "\r\n" | "\n"
val TXT = "[^\",\r\n]".r
val SPACES = "[ \t]+".r

val escaped = {
((SPACES ?) ~> DQUOTE ~> ((COMMA | CRLF | TXT | DQUOTE_ESC) *) <~ DQUOTE <~ (SPACES ?)) ^^ {
case ls => ls.mkString("")
}
}

val nonEscaped = (TXT *) ^^ {
case ls => ls.mkString
}

val field = escaped | nonEscaped

val record = repsep(field, COMMA)

val file = repsep(record, CRLF) <~ (CRLF ?)

def parse(s: Reader[Char]) = parseAll(file, s) match {
case Success(res, _) => res
case e => throw new Exception(e.toString)
}
}

object CSVParser extends CSVParser
73 changes: 73 additions & 0 deletions src/main/scala/ro/dvulpe/ingparser/INGParser.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
package ro.dvulpe.ingparser

import org.joda.time.DateTime

import scala.util.Try
import scala.util.parsing.combinator._

case class Summary(date: DateTime, details: String, debit: Option[BigDecimal], credit: Option[BigDecimal]) {
def asList = date :: details :: debit.getOrElse("") :: credit.getOrElse("") :: Nil

def asString = s"${date.toLocalDate.toString} - $details, Debit: $debit, Credit: $credit"
}

case class IngRecord(summary: Summary, details: Seq[String]) {
def asString: String = {
summary.asString + details.map("\t" + _).mkString("\n")
}
}

object INGParser extends RegexParsers with CSVParser {

def asDate(data: String) = Parser {
in: Input =>
Try(data.asDateTime) match {
case scala.util.Success(localDate) => Success(localDate, in)
case scala.util.Failure(ex) => Failure(ex.toString, in)
}
}

val date: Parser[DateTime] = field >> asDate

def asDecimal(data: String) = Parser {
in: Input =>
Try(data.asDecimal) match {
case scala.util.Success(value) => Success(value, in)
case scala.util.Failure(ex) => Failure(ex.getMessage, in)
}
}

val decimal = field >> asDecimal

val transDetail = COMMA ~> field <~ repsep(field, COMMA)

val transDetails = rep(transDetail <~ (CRLF ?))

val summaryLine =
(date <~ COMMA) ~ (field <~ COMMA) ~ (decimal.? <~ COMMA) ~ (decimal ?) ^^ {
case transactionDate ~ details ~ credit ~ debit =>
Summary(transactionDate, details, credit, debit)
}

val ingRecord =
(summaryLine <~ CRLF) ~ (transDetails ?) ^^ {
case summary ~ trans =>
IngRecord(summary, trans.getOrElse(Seq.empty))
}

val header = ("Data" ~> rep1sep(field, COMMA)) ~> CRLF

val ingRecords = header.? ~> rep(ingRecord <~ CRLF.?)

def parseRecords(s: String): List[IngRecord] = parseAll(ingRecords, s) match {
case Success(res, _) => res
case e => throw new Exception(e.toString)
}

def parseTest[T](in: String, parser: Parser[T]): T = {
parseAll(parser, in) match {
case Success(res, _) => res
case e => throw new Exception(e.toString)
}
}
}
20 changes: 20 additions & 0 deletions src/main/scala/ro/dvulpe/ingparser/ParenthesisParser.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
package ro.dvulpe.ingparser

import scala.util.parsing.combinator.RegexParsers

object ParenthesisParser extends RegexParsers {
def term = "[^\\(\\)]+".r

val expr: Parser[String] = "(" ~ rep(term | expr) ~ ")" ^^ {
case open ~ l ~ closed => s"$open $l $closed"
}

lazy val allExpr: Parser[List[String]] = rep(expr)

def apply(input: String) = parseAll(allExpr, input)

def isValid(input: String): Boolean = parseAll(allExpr, input) match {
case s@Success(_, _) => true
case _ => false
}
}
41 changes: 41 additions & 0 deletions src/main/scala/ro/dvulpe/ingparser/Parser.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
package ro.dvulpe.ingparser

import scala.io.Source
import org.joda.time.DateTime
import org.joda.time.format.DateTimeFormat
import java.util.Locale

object Parser extends App {

val source = Source.fromFile("/Users/dan/Downloads/Tranzactii_pe_perioada-5.csv").mkString("")

implicit def stringToDate(input: String): DateTime =
DateTimeFormat.forPattern("dd MMMMM yyyy").withLocale(new Locale("RO")).parseDateTime(input)

val start = System.currentTimeMillis()
// val input = "(bla(bla) (bla)(bla)(bla)(bla)(bla))(bla)(bla)"
// (1 to 1000000).foreach {
// _ =>
// _ =>

implicit val dateOrdering = new Ordering[DateTime] {
def compare(x: DateTime, y: DateTime): Int =
x.compareTo(y)
}

// implicit val dateTimeOrdering: Ordering[DateTime] = comparableToOrdering(null)

val records: List[IngRecord] = INGParser.parseRecords(source).sortBy(_.summary.date)
val duration = System.currentTimeMillis() - start
println(records.size)
records.foreach(r => println(r.asString))
// val records = ParenthesisParser(input)
// println(records.successful)
// }
println(s"total duration ${duration}ms")

// val output = records.map {
// rec => (rec.summary.asList ++ List(rec.details.mkString(" "))).map(f => f).mkString(",")
// }.mkString("\n")
// println(output)
}
9 changes: 9 additions & 0 deletions src/main/scala/ro/dvulpe/ingparser/Transaction.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package ro.dvulpe.ingparser

import org.joda.time.DateTime

case class Transaction(id: String, date: DateTime, notes: String, amount: BigDecimal,
payee: String, accountRef: Option[String], bank: Option[String],
reference: String)


18 changes: 18 additions & 0 deletions src/main/scala/ro/dvulpe/ingparser/package.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
package ro.dvulpe

import org.joda.time.format.DateTimeFormat
import java.util.Locale
import java.text.{NumberFormat, DecimalFormat}

package object ingparser {
implicit def richString(input: String) = new {
def asDateTime = DateTimeFormat.forPattern("dd MMMMM yyyy").withLocale(new Locale("RO")).parseDateTime(input)

def asDecimal = {
val format: DecimalFormat = NumberFormat.getInstance(new Locale("RO")).asInstanceOf[DecimalFormat]
format.setParseBigDecimal(true)
BigDecimal.apply(format.parse(input).asInstanceOf[java.math.BigDecimal])
}
}

}
9 changes: 9 additions & 0 deletions src/main/scala/ro/dvulpe/main/MainClass.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
package ro.dvulpe.main

//import scala.util.App

object MainClass extends App {

// val input = read

}
59 changes: 59 additions & 0 deletions src/test/scala/ro/dvulpe/ingparser/INGParserTest.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,59 @@
package ro.dvulpe.ingparser

import org.scalatest.FunSuite
import org.joda.time.DateTime

class INGParserTest extends FunSuite {

test("transaction detail line should be parsed") {
val input = ",Beneficiar: RCS AND RDS SA,,"
val result = INGParser.parseTest(input, INGParser.transDetail)
println(result)
assert(result === "Beneficiar: RCS AND RDS SA")
}

test("transaction details lines should be parsed") {
val input = ",Beneficiar: RCS AND RDS SA,,\n,Banca: INGB CENTRALA,,\n,Referinta: 70979872,,"
val result = INGParser.parseTest(input, INGParser.transDetails)
println(result)
assert(result === List("Beneficiar: RCS AND RDS SA", "Banca: INGB CENTRALA", "Referinta: 70979872"))
}

test("transaction summary line should be parsed") {
val input = "30 septembrie 2013,Plata debit direct,\"120,19\","
val result = INGParser.parseTest(input, INGParser.summaryLine)
assert(result === Summary(new DateTime("2013-09-30T00:00:00.000+03:00"), "Plata debit direct", Some(BigDecimal("120.19")), None))
}

test("transaction with details should be parsed") {
val input = "30 septembrie 2013,Plata debit direct,\"120,19\",\n,Beneficiar: RCS AND RDS SA,,\n,Banca: INGB CENTRALA,,\n,Referinta: 70979872,,"
val result = INGParser.parseTest(input, INGParser.ingRecord)
assert(result != null)
}

test("transaction without details should be parsed") {
val input = "30 septembrie 2013,Plata debit direct,\"120,19\",\n"
val result = INGParser.parseTest(input, INGParser.ingRecord)
assert(result != null)
}

test("two consecutive transactions should be parsed") {
val input = "30 septembrie 2013,Plata debit direct,\"120,19\",\n30 septembrie 2013,Plata debit direct,\"120,19\",\n"
val result = INGParser.parseRecords(input)
assert(result.size === 2)
}

test("header should be matched") {
val input = "Data,Detalii tranzactie,Debit,Credit\n"
val result = INGParser.parseTest(input, INGParser.header)
assert(result != null)
}

test("transaction fiel with header should be matched") {
val input = "Data,Detalii tranzactie,Debit,Credit\n30 septembrie 2013,Plata debit direct,\"120,19\",\n30 septembrie 2013,Plata debit direct,\"120,19\",\n"
val result = INGParser.parseRecords(input)
assert(result.size === 2)

}

}
13 changes: 13 additions & 0 deletions src/test/scala/ro/dvulpe/ingparser/ParenthesisParserTest.scala
Original file line number Diff line number Diff line change
@@ -0,0 +1,13 @@
package ro.dvulpe.ingparser

import org.scalatest.{FunSuite, Matchers}

class ParenthesisParserTest extends FunSuite with Matchers {
test("balanced parentheses") {
ParenthesisParser.isValid("()") should equal (true)
}
test("unbalanced parentheses") {
ParenthesisParser.isValid(")((test)test(test2)))") should equal (false)
}

}

0 comments on commit 266bea0

Please sign in to comment.