View in color | License | Download script | History | Other scripts by: vincentecuye |
30-Mar 5:30 UTC
[0.061] 17.295k
[0.061] 17.295k
codec-csv.rREBOL [
Name: 'codec-csv
Title: "Codec: Comma Separated Values (CSV)"
Date: 5-Mar-2025
Name: 'codec-csv
Version: 1.0.0
Author: Rights: "Annick ECUYER"
File: %codec-csv.r
History: [
5-Mar-2025 "Annick" {Initial version}
]
Usage: {
To use with %codecs.r, but defines a minimal interface if it's missing.
Returns csv data [string!] :
>> encode 'csv data
arguments:
data : block of lines to encode [block!]
refinements:
/as format [string! char! word! block! object!]
where format can be
a delimiter: ";" (default is ",")
a word: 'tsv (for tab separated values, default is 'csv)
or a block or object: [ ; here with default values
field: "," ; field delimter
record: "^M^/" ; record/lines delimiter
quote: {"} ; quote used for strings
escape: {"} ; how quotes are escaped in text
]
>> encode/as 'csv data ","
tab-separated value (unquoted, unescaped) can be specified too:
>> encode/as 'csv data 'tsv
Returns csv data as a block of lines (block of blocks). Empty fields are set to none! :
>> decode 'csv data
arguments:
data : data to decode [binary! file! url!]
refinements:
/as format [string! char! word! block! object!]
like with 'encode The delimeters/quotes are parametrable :
>> decode/as 'csv %data.csv [field: #";" quote: #"'"]
>> decode/as 'csv %data.tsv 'tsv
}
Library: [
level: 'intermediate
platform: 'all
type: [module tool codec]
domain: [database file-handling text files]
tested-under: [
view 1.2.1.3.1 on [Windows11]
view 2.7.8.3.1 on [Windows11]
]
support: none
license: 'bsd
see-also: [%codecs.r]
]
]
if any [not value? 'component? not component? 'codecs] [
if not value? 'codecs [codecs: copy []]
if not value? 'register-codec [
register-codec: func [body][
body: context body
either select codecs body/name [
change next find codecs body/name body
][append codecs reduce [body/name body]]
]
encode: func [codec value /as type] [
either as [codecs/:codec/encode/as value type][codecs/:codec/encode value]
]
decode: func [codec value /as type] [
either as [codecs/:codec/decode/as value type][codecs/:codec/decode value]
]
encoding?: func [value [binary!]] [
foreach [name codec] codecs [
if all [value? in codec 'identify codec/identify value] [return name]
]
none
]
]
]
context [
default-format: make object! [
field: ","
record: "^M^/"
quote: {"}
escape: {"}
]
register-codec [
name: 'csv
type: 'text
title: "Comma Separated Values (CSV)"
version: system/script/header/version
suffixes: [%.csv %.tsv]
encode: func [data [block!] /as format /local tsv] [
either all [as not word? format] [
either any [block? format object? format] [
format: make default-format format
] [format: make default-format [field: to-string format]]
] [
tsv: all [as find [tab tsv] format]
format: make default-format either as [[field: either tsv ["^-"] [format]]][[]]
]
result: copy ""
foreach line data [
line: compose [(line)]
foreach item line [
append result any [
all [date? item not none? item/time rejoin [item/year "-" item/month "-" item/day " " item/time]]
all [date? item rejoin [item/year "-" item/month "-" item/day]]
all [number? item form item]
all [none? item ""]
all [tsv form item]
rejoin [format/quote (replace/all form item format/quote rejoin [format/escape format/quote]) format/quote]
]
append result format/field
]
if not empty? result [remove back tail result]
append result format/record
]
result
]
decode: func [
data [binary! string! file! url!]
/as format [char! string! word! block! object!]
/local
quote sep escape result line lines item whitespace non-quote-escape non-sep value tsv
] [
if any [file? data url? data] [data: read data]
if binary? data [data: as-string data]
either all [as not word? format] [
either any [block? format object? format] [
format: make default-format format
] [format: make default-format [field: to-string format]]
] [
tsv: all [as find [tab tsv] format]
format: make default-format either as [[field: either tsv ["^-"] [format]]][[]]
]
sep: format/field
escape: format/escape
quote: format/quote
; changes crlf, lfcr & cr to lf
data: copy data
replace/all data "^M^/" "^/"
replace/all data "^/^M" "^/"
replace/all data "^M" "^/"
whitespace: charset " ^-"
non-quote-escape: complement charset rejoin ["" quote escape]
non-sep: complement charset rejoin ["" sep newline]
either tsv [
result: copy []
lines: copy []
parse/all data [
any [copy line to "^/" "^/" (append lines line)]
copy line to end (if line [append lines line])
]
foreach line lines [
append/only result copy []
parse/all line [
any [copy item to "^-" "^-" (append last result item)]
copy item to end (append last result item)
]
]
] [
result: copy []
item: [
any whitespace
[quote copy data any non-quote-escape (if data [append value data]) any [
escape copy data escape (append value data) |
escape copy data quote (append value data) |
copy data some non-quote-escape (append value data)
] quote] any whitespace |
[any whitespace copy data some non-sep (append value data)]
]
line: [
(append/only result copy [])
any [(value: copy "") opt item sep any whitespace (append last result either empty? value [none] [value])]
[(value: copy "") [item | to newline] (append last result either empty? value [none] [value])]
]
parse/all data [
opt #{EFBBBF} ; UTF-8 BOM
any [
[any whitespace newline (append/only result copy [])] |
[any whitespace line any whitespace newline]
]
]
if empty? last result [remove back tail result]
]
result
]
]
] |