Script Library: 1247 scripts
  • Home
  • Script library
  • AltME Archive
  • Mailing list
  • Articles Index
  • Site search
 

Archive version of: codec-csv.r ... version: 1 ... vincentecuye 5-Mar

Amendment note: new script || Publicly available? Yes

REBOL [
    Name: 'codec-csv
	Title: "Codec: Comma Separated Values (CSV)"
    Date: 5-Mar-2025
	Name: 'codec-csv
	Version: 1.0.0
	Author: Rights: "Annick ECUYER"
    File: %codec-csv.r
	History: [
        5-Mar-2025 "Annick" {Initial version}
	]
	Usage: {
        To use with %codecs.r, but defines a minimal interface if it's missing.

        Returns csv data [string!] :

            >> encode 'csv data

            arguments:
                data : block of lines to encode [block!]

            refinements: 

            /as format [string! char! word! block! object!]

            where format can be 
                a delimiter: ";" (default is ",")
                a word: 'tsv (for tab separated values, default is 'csv)
                or a block or object: [ ; here with default values
                    field: ","     ; field delimter
                    record: "^M^/" ; record/lines delimiter
                    quote: {"}     ; quote used for strings
                    escape: {"}    ; how quotes are escaped in text
                ] 

            >> encode/as 'csv data ","

            tab-separated value (unquoted, unescaped) can be specified too: 

            >> encode/as 'csv data 'tsv

        Returns csv data as a block of lines (block of blocks). Empty fields are set to none! :

            >> decode 'csv data

            arguments:
                data : data to decode [binary! file! url!]

            refinements:

            /as format [string! char! word! block! object!]

            like with 'encode The delimeters/quotes are parametrable : 

            >> decode/as 'csv %data.csv [field: #";" quote: #"'"]
            >> decode/as 'csv %data.tsv 'tsv
    }
    Library: 
        level: 'intermediate
        platform: 'all
        type: [module tool codec]
        domain: [database file-handling text files]
        tested-under: [
        	view 1.2.1.3.1 on [Windows11]
        	view 2.7.8.3.1 on [Windows11]
        ]
        support: none
        license: 'bsd
        see-also: [%codecs.r]
]

if any [not value? 'component? not component? 'codecs] [
    if not value? 'codecs [codecs: copy []]
    if not value? 'register-codec [
        register-codec: func [body][
            body: context body
            either select codecs body/name [
                change next find codecs body/name body
            ][append codecs reduce [body/name body]]
        ]
        encode: func [codec value /as type] [
            either as [codecs/:codec/encode/as value type][codecs/:codec/encode value]
        ]
        decode: func [codec value /as type] [
            either as [codecs/:codec/decode/as value type][codecs/:codec/decode value]
        ]
        encoding?: func [value [binary!]] [
            foreach [name codec] codecs [
                if all [value? in codec 'identify codec/identify value] [return name]
            ]
            none
        ]
    ]
]

context [
    default-format: make object! [
        field: ","
        record: "^M^/"
        quote: {"}
        escape: {"}
    ]
    register-codec [
        name: 'csv
        type: 'text
        title: "Comma Separated Values (CSV)"
        version: system/script/header/version
        suffixes: [%.csv %.tsv]
        encode: func [data [block!] /as format /local tsv] [
            either all [as not word? format] [
                either any [block? format object? format] [
                    format: make default-format format
                ] [format: make default-format [field: to-string format]]
            ] [
                tsv: all [as find [tab tsv] format]
                format: make default-format either as [[field: either tsv ["^-"] [format]]][[]]
            ]
  
            result: copy "" 
            foreach line data [
                line: compose [(line)]
                foreach item line [
                    append result any [
                        all [date? item not none? item/time rejoin [item/year "-" item/month "-" item/day " " item/time]]
                        all [date? item rejoin [item/year "-" item/month "-" item/day]]
                        all [number? item form item]
                        all [none? item ""]
                        all [tsv form item]
                        rejoin [format/quote (replace/all form item format/quote rejoin [format/escape format/quote]) format/quote]
                    ]
                    append result format/field
                ]
                if not empty? result [remove back tail result] 
                append result format/record
            ] 
            result
        ]

        decode: func [
            data [binary! string! file! url!] 
            /as format [char! string! word! block! object!] 
        /local 
            quote sep escape result line lines item whitespace non-quote-escape non-sep value tsv
        ] [
            if any [file? data url? data] [data: read data]
            if binary? data [data: as-string data]

            either all [as not word? format] [
                either any [block? format object? format] [
                    format: make default-format format
                ] [format: make default-format [field: to-string format]]
            ] [
                tsv: all [as find [tab tsv] format]
                format: make default-format either as [[field: either tsv ["^-"] [format]]][[]]
            ]
            sep: format/field 
            escape: format/escape 
            quote: format/quote

            ; changes crlf, lfcr & cr to lf
            data: copy data
            replace/all data "^M^/" "^/"
            replace/all data "^/^M" "^/"
            replace/all data "^M" "^/"


            whitespace: charset " ^-"
            non-quote-escape: complement charset rejoin ["" quote escape]
            non-sep: complement charset rejoin ["" sep newline]

            either tsv [
                result: copy []
                lines: copy []
                parse/all data [
                    any [copy line to "^/" "^/" (append lines line)]
                    copy line to end (if line [append lines line])
                ]
                foreach line lines [
                    append/only result copy []
                    parse/all line [
                        any [copy item to "^-" "^-" (append last result item)]
                        copy item to end (append last result item)
                    ]
                ]
            ] [
                result: copy []
                item: [
                    any whitespace
                    [quote copy data any non-quote-escape (if data [append value data]) any [
                        escape copy data escape (append value data) |
                        escape copy data quote (append value data) |
                        copy data some non-quote-escape (append value data)
                    ] quote] any whitespace |
                    [any whitespace copy data some non-sep (append value data)]
                ]
                line: [
                    (append/only result copy []) 
                    any [(value: copy "") opt item sep any whitespace (append last result either empty? value [none] [value])]
                    [(value: copy "") [item | to newline] (append last result either empty? value [none] [value])]
                ]
                parse/all data [
                    opt #{EFBBBF} ; UTF-8 BOM
                    any [
                        [any whitespace newline (append/only result copy [])] |
                        [any whitespace line any whitespace newline] 
                    ]
                ]
                if empty? last result [remove back tail result]
            ]
            result
        ]
    ]
 ]