aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
Diffstat (limited to 'src')
-rw-r--r--src/header_template.txt6
-rw-r--r--src/main.odin410
-rw-r--r--src/wav/wav.odin103
-rw-r--r--src/wav/xml/debug_print.odin86
-rw-r--r--src/wav/xml/doc.odin23
-rw-r--r--src/wav/xml/helpers.odin52
-rw-r--r--src/wav/xml/tokenizer.odin415
-rw-r--r--src/wav/xml/xml_reader.odin628
8 files changed, 1600 insertions, 123 deletions
diff --git a/src/header_template.txt b/src/header_template.txt
index 8d42a55..cc4477a 100644
--- a/src/header_template.txt
+++ b/src/header_template.txt
@@ -1,14 +1,14 @@
# Header info fields
#
# In this file, you can write things you want to appear in the header of the sound report.
-# These fields will be inserted into sound reports generated directly from wav metadata.
+# These fields will only be inserted into sound reports generated directly from wav metadata.
# This happens when you submit a folder with .WAV-files that doesn't contain a CSV file.
#
# Empty lines, and lines starting with # are ignored.
-# No need to add facts about the files, those are added automatically from metadata.
+# Don't add facts about the files, those are added automatically from metadata.
+# This includes project name and tape, which are generally recorded in the files themselves.
Sound Mixer: Ola Nordmann
Phone: +0 123 45 678
Email: ola@nordmann.no
Boom Operator: Sven Svensson
-Project: Project Name
diff --git a/src/main.odin b/src/main.odin
index c0f66ba..7255dd6 100644
--- a/src/main.odin
+++ b/src/main.odin
@@ -6,6 +6,7 @@ import "core:os/os2"
import "core:path/filepath"
import "core:sys/windows"
import "core:strings"
+import "core:math"
import "wav"
/*
@@ -45,6 +46,7 @@ Info_Line :: struct {
}
Report :: struct {
+ // Content
title : string,
info_lines : []Info_Line,
header : []string,
@@ -53,12 +55,15 @@ Report :: struct {
row_count : int,
info_line_count : int,
tc_column_index : int,
+
+ // Meta
+ path : string,
}
CSV :: string
Directory :: [dynamic]string
Job :: union {CSV, Directory}
-job_list :: [dynamic]Job
+job_list : [dynamic]Job
// TODO: Changing file_list to job_list, so the Directory jobs can contain a list of all the relevant .wav files before being sent to parse_folder()
main :: proc() {
@@ -92,33 +97,48 @@ main :: proc() {
if(path_info.is_dir) {
fmt.printf("Directory submitted! Walking directory...\n\n")
fmt.printf("šŸ“ {}\n", path_info.name)
- try_os2 := walk_directory(path_info.fullpath, &file_count, &job_list, 1)
+ try_os2 := walk_directory(path_info.fullpath, &file_count, 1)
if len(job_list) == 0 && try_os2 {
fmt.printf("\nNot_Dir error encountered. Trying os2 version...\n\n")
fmt.printf("šŸ“ {}\n", path_info.name)
- walk_directory_os2(path_info.fullpath, &file_count, &job_list, 1)
+ walk_directory_os2(path_info.fullpath, &file_count, 1)
}
} else {
fmt.println("File submitted! Processing file...")
- append(&job_list, strings.clone(path_info.fullpath))
+ append(&job_list, CSV(strings.clone(path_info.fullpath)))
}
- for file, f in job_list {
+ for job, i in job_list {
- file_info, _ := os.stat(file)
- fmt.printf("\nšŸ“„ File {}: {}\n", f+1, file_info.name)
- parsed, ok_parse := parse(file_info.fullpath)
- if !ok_parse {
- fmt.printf("Parse failed: {}\n", file_info.fullpath)
- continue
+ parsed : Report
+ parse_ok : bool
+ switch file in job {
+ case CSV:
+ file_info, _ := os.stat(file)
+ fmt.printf("\nšŸ“„ File {}: {}\n", i+1, file_info.name)
+ parsed, parse_ok = parse_file(file_info.fullpath)
+ if !parse_ok {
+ fmt.printf("Parse failed: {}\n", file_info.fullpath)
+ continue
+ }
+ case Directory:
+ fmt.printf("\nšŸ“ Folder {}: ", i+1)
+ parsed, parse_ok = parse_folder(file)
+ fmt.printf("{}", parsed.title)
+ if parse_ok {
+ fmt.printf("\nParsed %d WAV(s).\n", parsed.row_count)
+ } else {
+ file_info, _ := os.stat(file[0])
+ fmt.printf("\nParse failed: {}\n", file_info.fullpath)
+ continue
+ }
}
- output_name := fmt.aprintf("{}/{}_Knekt_Lydrapport.html", filepath.dir(file_info.fullpath), parsed.title, allocator=context.temp_allocator)
- render(parsed, output_name)
+ render(parsed)
free_all(context.temp_allocator)
files_done += 1
}
- fmt.printf("\nCompleted {}/{} files.\n\n", files_done, len(job_list))
+ fmt.printf("\nCompleted {}/{} job(s).\n\n", files_done, len(job_list))
} else {
fmt.printf("ERROR could not get path info for: {}\n", input_file_name)
}
@@ -126,19 +146,253 @@ main :: proc() {
}
-parse :: proc(path : string, device : Device = .UNSET) -> (Report, bool) {
- if os.is_dir(path) {
- return parse_folder(path)
+parse_folder :: proc(paths : Directory) -> (Report, bool) {
+
+ // 888 888 888 8888b. 888 888
+ // 888 888 888 "88b 888 888
+ // 888 888 888 .d888888 Y88 88P
+ // d8b Y88b 888 d88P 888 888 Y8bd8P
+ // Y8P "Y8888888P" "Y888888 Y88P
+
+ output : Report = {}
+
+ wavs : [dynamic]wav.Wav
+
+ max_channels := 0
+ for path, i in paths {
+ w, ok := wav.read(path)
+ if ok {
+ append(&wavs, w)
+ max_channels = max(max_channels, w.channels)
+ }
+ }
+
+ header_build : [dynamic]string
+ append(&header_build, "Circled")
+ append(&header_build, "File Name")
+ append(&header_build, "Scene")
+ append(&header_build, "Take")
+ append(&header_build, "Timecode")
+ append(&header_build, "TC FPS")
+ append(&header_build, "User Bits")
+ append(&header_build, "Tape")
+ append(&header_build, "Date")
+ append(&header_build, "Project")
+ append(&header_build, "Sample Rate")
+ append(&header_build, "Format") // Bit depth and int vs float
+ first_channel_index := len(header_build)
+ last_channel_index := -1
+ for i in 0..<max_channels {
+ track_title := fmt.aprintf("Track %d", i+1)
+ last_channel_index = len(header_build)
+ append(&header_build, track_title)
+ }
+ append(&header_build, "Note")
+ output.header = header_build[:]
+
+ output.column_count = len(header_build)
+ output.row_count = len(wavs)
+
+ output.table = make([][]string, output.row_count, context.temp_allocator)
+ for &row in output.table {
+ row = make([]string, output.column_count, context.temp_allocator)
+ }
+
+ output.info_lines = make([]Info_Line, 64, context.temp_allocator)
+
+ info_txt, info_txt_ok := os.read_entire_file(HEADER_FIELDS_PATH, context.temp_allocator)
+ if info_txt_ok {
+ it := string(info_txt)
+ line_index := 0
+ for line in strings.split_lines_iterator(&it) {
+ if strings.starts_with(line, "#") {
+ continue
+ }
+ if len(line)<2 {
+ continue
+ }
+ colon := strings.index_rune(line, ':')
+ if colon==-1 {
+ continue
+ }
+ CUTSET :: " "
+ output.info_lines[line_index].field = strings.trim(line[:colon+1], CUTSET)
+ output.info_lines[line_index].entry = strings.trim(line[colon+1:], CUTSET)
+ line_index += 1
+ }
+ output.info_lines[line_index].field = " "
+ output.info_lines[line_index].entry = "- - - - -"
+ line_index += 1
+ output.info_line_count = line_index
}
- return parse_file(path, device)
-}
-parse_folder :: proc(path : string) -> (Report, bool) {
- output : Report = {}
- return output, false
+
+ // Populating the table with data
+
+ for w, i in wavs {
+ row := output.table[i]
+ stat, _ := os.stat(w.path, allocator=context.temp_allocator)
+
+ for name, i in w.channel_names {
+ row[first_channel_index + i] = name
+ }
+ for title, i in output.header {
+ switch title {
+ case "File Name":
+ row[i] = stat.name
+ case "Scene":
+ row[i] = w.scene
+ case "Take":
+ row[i] = fmt.tprintf("T%03d", w.take)
+ case "Timecode":
+ row[i] = fmt.tprintf("%02d:%02d:%02d:%02d", // Timecode
+ w.timecode.hour,
+ w.timecode.minute,
+ w.timecode.second,
+ int(math.round(w.timecode.frame)))
+ case "TC FPS":
+ if w.tc_dropframe { // TC FPS
+ row[i] = fmt.tprintf("%.03f DF", w.tc_framerate)
+ } else {
+ row[i] = fmt.tprintf("%.03f ND", w.tc_framerate)
+ }
+ case "User Bits":
+ if w.ubits != {0,0,0,0,0,0,0,0,} {
+ row[i] = fmt.tprintf("%d%d%d%d%d%d%d%d", expand_values(w.ubits))
+ }
+ case "Tape":
+ row[i] = w.tape
+ case "Date":
+ row[i] = fmt.tprintf("%04d-%02d-%02d", expand_values(w.date))
+ case "Project":
+ row[i] = w.project
+ case "Sample Rate":
+ row[i] = fmt.tprintf("%d Hz", w.sample_rate)
+ case "Format":
+ switch w.format { // "Format", aka bit depth + int vs float
+ case .INT:
+ row[i] = fmt.tprintf("%d-bit int", w.bit_depth)
+ case .FLOAT:
+ row[i] = fmt.tprintf("%d-bit float", w.bit_depth)
+ }
+ case "Circled":
+ if w.circled do row[i] = "O"
+ case "Note":
+ row[i] = w.note
+ }
+ }
+ }
+
+
+
+ // Cleanup!
+ when VERBOSE do fmt.printf("Struct before cleanup:\n%#v\n", output)
+
+ // Stacking tracks to the left
+ for &line, l in output.table {
+ stacking_index := first_channel_index
+ for &field, f in line[first_channel_index:last_channel_index+1] {
+ if field != "" {
+ line[stacking_index] = field
+ stacking_index += 1
+ }
+ }
+ for &field, f in line[stacking_index:last_channel_index+1] {
+ field = ""
+ }
+ }
+
+
+ // Cleaning out unused columns
+ touched := make([]bool, output.column_count, context.temp_allocator)
+ // Finding them
+ for line, l in output.table {
+ for field, f in line {
+ if touched[f] do continue
+ if field != "" {
+ touched[f] = true
+ }
+ }
+ }
+
+ // Turning unchanging columns into info line
+ changed := make([]bool, output.column_count, context.temp_allocator)
+ prev_line : []string = nil
+ for line, l in output.table {
+ if l>0 {
+ prev_line = output.table[l - 1]
+ for field, f in line {
+ if (prev_line[f] != field) ||
+ (first_channel_index <= f && f <= last_channel_index) ||
+ (f == output.tc_column_index) {
+ changed[f] = true
+ }
+ }
+ }
+ }
+ for did_change, i in changed {
+ if (!did_change) && touched[i] {
+ field := fmt.aprintf("{}: ", output.header[i], allocator=context.temp_allocator)
+ entry := prev_line[i]
+ output.info_lines[output.info_line_count] = {field=field, entry=entry}
+ output.info_line_count += 1
+ }
+ }
+
+
+ // Removing unused and static
+ for &line, l in output.table {
+ stacking_index := 0
+ for &field, f in line {
+ if touched[f] && changed[f] {
+ line[stacking_index] = field
+ stacking_index += 1
+ }
+ }
+ for &field, f in line[stacking_index:] {
+ field = ""
+ }
+ }
+ stacking_index := 0
+ for &field, f in output.header {
+ if touched[f] && changed[f] {
+ output.header[stacking_index] = field
+ stacking_index += 1
+ }
+ }
+ for &field, f in output.header[stacking_index:] {
+ field = ""
+ }
+
+ output.column_count = stacking_index
+
+ // Setting title for report
+ output.title = strings.trim(filepath.base(filepath.dir(paths[0])), "/\\")
+ for item in output.info_lines {
+ if item.field == "Tape" {
+ output.title = item.entry
+ }
+ }
+
+ // Setting column to sort by
+ for title, i in output.header {
+ if title == "Timecode" {
+ output.tc_column_index = i
+ break
+ }
+ }
+
+ when VERBOSE do fmt.printf("Struct before output:\n%#v\n", output)
+
+ output.path = fmt.tprintf("{}/{}_Knekt_Lydrapport.html", filepath.dir(paths[0]), output.title)
+
+ return output, true
}
-parse_file :: proc(path : string, device : Device = .UNSET) -> (Report, bool) {
+
+
+
+parse_file :: proc(path : CSV, device : Device = .UNSET) -> (Report, bool) {
device := device
output : Report = {}
data, ok := os.read_entire_file(path, context.temp_allocator)
@@ -158,20 +412,20 @@ parse_file :: proc(path : string, device : Device = .UNSET) -> (Report, bool) {
if (device!=.UNSET) { break }
if line == "\"SOUND REPORT\"," {
device = .ZOOM
- if VERBOSE do fmt.printf("Detected ZOOM from quotes and comma on line index {}\n", line_number)
+ when VERBOSE do fmt.printf("Detected ZOOM from quotes and comma on line index {}\n", line_number)
}
if line == "\"ZOOM F8\"," {
device = .ZOOM
- if VERBOSE do fmt.printf("Detected ZOOM from \"ZOOM F8\" on line index {}\n", line_number)
+ when VERBOSE do fmt.printf("Detected ZOOM from \"ZOOM F8\" on line index {}\n", line_number)
}
if line == "SOUND REPORT" {
device = .SD6
- if VERBOSE do fmt.printf("Detected SOUND_DEVICES from unquoted SOUND REPORT line index {}\n", line_number)
+ when VERBOSE do fmt.printf("Detected SOUND_DEVICES from unquoted SOUND REPORT line index {}\n", line_number)
}
if len(line)<15 do continue
if line[:13] == "SOUND REPORT," {
device = .SD8
- if VERBOSE do fmt.printf("Detected SOUND_DEVICES 8-series from SOUND REPORT with missing newline on line index {}\n", line_number)
+ when VERBOSE do fmt.printf("Detected SOUND_DEVICES 8-series from SOUND REPORT with missing newline on line index {}\n", line_number)
}
}
@@ -271,7 +525,7 @@ parse_file :: proc(path : string, device : Device = .UNSET) -> (Report, bool) {
// STAGE 3 --------------------------------------------------------------
// Filling with data
- if VERBOSE do fmt.printf("Struct before main parse:\n%#v\n", output)
+ when VERBOSE do fmt.printf("Struct before main parse:\n%#v\n", output)
first_channel_index := -1
last_channel_index := -1
@@ -304,7 +558,7 @@ parse_file :: proc(path : string, device : Device = .UNSET) -> (Report, bool) {
stage = .INFO
line_elements := strings.split(line, ",")
- if VERBOSE do fmt.printf(".INFO {}: {}\n", line_index, line_elements)
+ when VERBOSE do fmt.printf(".INFO {}: {}\n", line_index, line_elements)
field := fmt.aprintf("{}:", line_elements[1], allocator=context.temp_allocator)
entry := line_elements[2]
output.info_lines[info_line_index].field = field
@@ -318,9 +572,9 @@ parse_file :: proc(path : string, device : Device = .UNSET) -> (Report, bool) {
continue
}
line_elements := strings.split(line, ",")
- if VERBOSE do fmt.printf(".INFO {}: {}\n", line_index, line_elements)
+ when VERBOSE do fmt.printf(".INFO {}: {}\n", line_index, line_elements)
if line_elements[0] == "Date" {
- if VERBOSE do fmt.printf("Skipping line {}, because it's the retarded date field on an 8-series\n", line_index)
+ when VERBOSE do fmt.printf("Skipping line {}, because it's the retarded date field on an 8-series\n", line_index)
output.info_line_count -= 1
continue
}
@@ -335,10 +589,10 @@ parse_file :: proc(path : string, device : Device = .UNSET) -> (Report, bool) {
if line == "," {
continue // This is here because there are a bunch of lines that are just commas before the header
} else if len(line)>3 {
- if VERBOSE do fmt.printf(".HEADER {}:", line_index)
+ when VERBOSE do fmt.printf(".HEADER {}:", line_index)
// No trailing comma in the header??
for element, e in strings.split(line, ",") {
- if VERBOSE do fmt.printf(" {}", element)
+ when VERBOSE do fmt.printf(" {}", element)
output.header[e] = element
if element[:3] == "Trk" {
@@ -351,22 +605,22 @@ parse_file :: proc(path : string, device : Device = .UNSET) -> (Report, bool) {
}
}
- if VERBOSE do fmt.printf("\n")
- if VERBOSE do fmt.printf("first_channel_index: {}\n", first_channel_index)
- if VERBOSE do fmt.printf("last_channel_index: {}\n", last_channel_index)
+ when VERBOSE do fmt.printf("\n")
+ when VERBOSE do fmt.printf("first_channel_index: {}\n", first_channel_index)
+ when VERBOSE do fmt.printf("last_channel_index: {}\n", last_channel_index)
stage = .BODY
}
case .BODY:
if len(line) > 2 {
- if VERBOSE do fmt.printf(".BODY {}:", line_index)
+ when VERBOSE do fmt.printf(".BODY {}:", line_index)
for element, e in strings.split(line, ",") {
- if VERBOSE do fmt.printf(" {}", element)
+ when VERBOSE do fmt.printf(" {}", element)
entry : string = element
output.table[body_line_index][e] = entry
}
- if VERBOSE do fmt.printf("\n")
+ when VERBOSE do fmt.printf("\n")
body_line_index += 1
}
@@ -409,7 +663,7 @@ parse_file :: proc(path : string, device : Device = .UNSET) -> (Report, bool) {
continue
}
line_elements := strings.split(line, ",")
- if VERBOSE do fmt.printf(".INFO {}: {}\n", line_index, line_elements)
+ when VERBOSE do fmt.printf(".INFO {}: {}\n", line_index, line_elements)
field := line_elements[0]
entry_raw := line_elements[1]
entry := line_elements[1][1:len(entry_raw)-1]
@@ -422,10 +676,10 @@ parse_file :: proc(path : string, device : Device = .UNSET) -> (Report, bool) {
if line == "," {
// This is here because there are a bunch of lines that are just commas before the header
} else if len(line)>3 {
- if VERBOSE do fmt.printf(".HEADER {}:", line_index)
+ when VERBOSE do fmt.printf(".HEADER {}:", line_index)
// No trailing comma in the header??
for element, e in strings.split(line, ",") {
- if VERBOSE do fmt.printf(" {}", element)
+ when VERBOSE do fmt.printf(" {}", element)
output.header[e] = element
if element[:4] == "Trk " {
@@ -438,20 +692,20 @@ parse_file :: proc(path : string, device : Device = .UNSET) -> (Report, bool) {
}
}
- if VERBOSE do fmt.printf("\n")
+ when VERBOSE do fmt.printf("\n")
} else if line == "" {
stage = .BODY
- if VERBOSE do fmt.printf("first_channel_index: {}\n", first_channel_index)
- if VERBOSE do fmt.printf("last_channel_index: {}\n", last_channel_index)
+ when VERBOSE do fmt.printf("first_channel_index: {}\n", first_channel_index)
+ when VERBOSE do fmt.printf("last_channel_index: {}\n", last_channel_index)
}
case .BODY:
if len(line) > 2 {
- if VERBOSE do fmt.printf(".BODY {}:", line_index)
+ when VERBOSE do fmt.printf(".BODY {}:", line_index)
// to skip empty entry after trailing comma we do a silly slice
for element, e in strings.split(line, ",")[:output.column_count] {
- if VERBOSE do fmt.printf(" {}", element)
+ when VERBOSE do fmt.printf(" {}", element)
entry : string = element
// Stripping quotes if after tracks begin
if e >= first_channel_index && (len(element)>0) {
@@ -459,7 +713,7 @@ parse_file :: proc(path : string, device : Device = .UNSET) -> (Report, bool) {
}
output.table[body_line_index][e] = entry
}
- if VERBOSE do fmt.printf("\n")
+ when VERBOSE do fmt.printf("\n")
body_line_index += 1
}
@@ -506,7 +760,7 @@ parse_file :: proc(path : string, device : Device = .UNSET) -> (Report, bool) {
continue
}
line_elements := strings.split(line, ",")
- if VERBOSE do fmt.printf(".INFO {}: {}\n", line_index, line_elements)
+ when VERBOSE do fmt.printf(".INFO {}: {}\n", line_index, line_elements)
field_raw := line_elements[0]
entry_raw := line_elements[1]
field := line_elements[0][1:len(field_raw)-1]
@@ -516,10 +770,10 @@ parse_file :: proc(path : string, device : Device = .UNSET) -> (Report, bool) {
info_line_index += 1
case .HEADER:
- if VERBOSE do fmt.printf(".HEADER {}:", line_index)
+ when VERBOSE do fmt.printf(".HEADER {}:", line_index)
// to skip empty entry after trailing comma we do a silly slice
for element, e in strings.split(line, ",")[:output.column_count] {
- if VERBOSE do fmt.printf(" {}", element)
+ when VERBOSE do fmt.printf(" {}", element)
output.header[e] = element[1:len(element)-1]
if element[:4] == "\"Tr " {
@@ -531,20 +785,20 @@ parse_file :: proc(path : string, device : Device = .UNSET) -> (Report, bool) {
output.tc_column_index = e
}
}
- if VERBOSE do fmt.printf("\n")
+ when VERBOSE do fmt.printf("\n")
stage = .BODY
- if VERBOSE do fmt.printf("first_channel_index: {}\n", first_channel_index)
- if VERBOSE do fmt.printf("last_channel_index: {}\n", last_channel_index)
+ when VERBOSE do fmt.printf("first_channel_index: {}\n", first_channel_index)
+ when VERBOSE do fmt.printf("last_channel_index: {}\n", last_channel_index)
case .BODY:
if line == "" do break
- if VERBOSE do fmt.printf(".BODY {}:", line_index)
+ when VERBOSE do fmt.printf(".BODY {}:", line_index)
// to skip empty entry after trailing comma we do a silly slice
for element, e in strings.split(line, ",")[:output.column_count] {
- if VERBOSE do fmt.printf(" {}", element)
+ when VERBOSE do fmt.printf(" {}", element)
output.table[body_line_index][e] = element[1:len(element)-1]
}
- if VERBOSE do fmt.printf("\n")
+ when VERBOSE do fmt.printf("\n")
body_line_index += 1
}
}
@@ -555,7 +809,7 @@ parse_file :: proc(path : string, device : Device = .UNSET) -> (Report, bool) {
// STAGE 4 --------------------------------------------------------------
// Cleanup!
- if VERBOSE do fmt.printf("Struct before cleanup:\n%#v\n", output)
+ when VERBOSE do fmt.printf("Struct before cleanup:\n%#v\n", output)
// Stacking tracks to the left
for &line, l in output.table {
@@ -635,13 +889,15 @@ parse_file :: proc(path : string, device : Device = .UNSET) -> (Report, bool) {
output.column_count = stacking_index
- if VERBOSE do fmt.printf("Struct before output:\n%#v\n", output)
+ when VERBOSE do fmt.printf("Struct before output:\n%#v\n", output)
+
+ output.path = fmt.tprintf("{}/{}_Knekt_Lydrapport.html", filepath.dir(path), output.title)
return output, true
}
-render :: proc(report : Report, path : string) {
+render :: proc(report : Report) {
// Now we output the HTML.
builder := strings.builder_make(context.temp_allocator)
@@ -687,9 +943,9 @@ render :: proc(report : Report, path : string) {
strings.write_string(&builder, PART_END)
output_text := strings.to_string(builder)
- os.write_entire_file(path, transmute([]u8)output_text)
+ os.write_entire_file(report.path, transmute([]u8)output_text)
- fmt.printf("Output: {}\n", path)
+ fmt.printf("Output: {}\n", report.path)
}
indent_by :: proc(i : int) {
@@ -698,7 +954,7 @@ indent_by :: proc(i : int) {
}
}
-walk_directory :: proc(path : string, file_number : ^int, job_list : ^[dynamic]string, depth : int = 0) -> bool {
+walk_directory :: proc(path : string, file_number : ^int, depth : int = 0) -> bool {
handle, ok := os.open(path)
if ok != os.ERROR_NONE {
indent_by(depth)
@@ -715,7 +971,7 @@ walk_directory :: proc(path : string, file_number : ^int, job_list : ^[dynamic]s
return true
}
- wav_count := 0
+ wav_files : [dynamic]string
has_csv := false
for file in files {
@@ -725,7 +981,7 @@ walk_directory :: proc(path : string, file_number : ^int, job_list : ^[dynamic]s
if file.is_dir {
indent_by(depth)
fmt.printf("šŸ“ %s\n", file.name)
- walk_directory(full_path, file_number, job_list, depth+1) // Recurse
+ walk_directory(full_path, file_number, depth+1) // Recurse
} else { // If file is actually a file
@@ -734,29 +990,30 @@ walk_directory :: proc(path : string, file_number : ^int, job_list : ^[dynamic]s
if extension == ".csv" {
indent_by(depth)
fmt.printf("šŸ“„ [#%d] %s\n", file_number^, file.name)
- append(job_list, strings.clone(file.fullpath))
+ append(&job_list, strings.clone(file.fullpath))
file_number^ += 1
has_csv = true
}
if extension == ".wav" {
- wav_count += 1
+ append(&wav_files, strings.clone(full_path))
}
}
}
+ wav_count := len(wav_files)
if wav_count>0 && !has_csv {
- indent_by(depth+1)
+ indent_by(depth)
if wav_count == 1 {
- fmt.printf("šŸ’½ [#%d] 1 WAV file.\n", file_number^)
+ fmt.printf("šŸ’½ [#%d] A WAV file.\n", file_number^)
} else {
fmt.printf("šŸ’½ [#%d] %d WAV files.\n", file_number^, wav_count)
}
- append(job_list, strings.clone(path))
+ append(&job_list, wav_files)
file_number^ += 1
}
return false
}
-walk_directory_os2 :: proc(path : string, file_number : ^int, job_list : ^[dynamic]string, depth : int = 0) {
+walk_directory_os2 :: proc(path : string, file_number : ^int, depth : int = 0) {
handle, ok := os2.open(path)
if ok != os2.ERROR_NONE {
indent_by(depth)
@@ -772,7 +1029,7 @@ walk_directory_os2 :: proc(path : string, file_number : ^int, job_list : ^[dynam
return
}
- wav_count := 0
+ wav_files : [dynamic]string
has_csv := false
for file in files {
@@ -782,7 +1039,7 @@ walk_directory_os2 :: proc(path : string, file_number : ^int, job_list : ^[dynam
if os.is_dir(full_path) {
indent_by(depth)
fmt.printf("šŸ“ %s\n", file.name)
- walk_directory_os2(full_path, file_number, job_list, depth+1) // Recurse
+ walk_directory_os2(full_path, file_number, depth+1) // Recurse
} else { // If file is actually a file
@@ -791,15 +1048,16 @@ walk_directory_os2 :: proc(path : string, file_number : ^int, job_list : ^[dynam
if extension == ".csv" {
indent_by(depth)
fmt.printf("šŸ“„ [#%d] %s\n", file_number^, file.name)
- append(job_list, strings.clone(file.fullpath))
+ append(&job_list, strings.clone(file.fullpath))
file_number^ += 1
has_csv = true
}
if extension == ".wav" {
- wav_count += 1
+ append(&wav_files, strings.clone(full_path))
}
}
}
+ wav_count := len(wav_files)
if wav_count>0 && !has_csv {
indent_by(depth+1)
if wav_count == 1 {
@@ -807,7 +1065,7 @@ walk_directory_os2 :: proc(path : string, file_number : ^int, job_list : ^[dynam
} else {
fmt.printf("šŸ’½ [#%d] %d WAV files.\n", file_number^, wav_count)
}
- append(job_list, strings.clone(path))
+ append(&job_list, wav_files)
file_number^ += 1
}
} \ No newline at end of file
diff --git a/src/wav/wav.odin b/src/wav/wav.odin
index 01fbad1..41667b5 100644
--- a/src/wav/wav.odin
+++ b/src/wav/wav.odin
@@ -5,22 +5,25 @@ import "core:math"
import "core:strings"
import "core:strconv"
import "core:os"
-import "core:encoding/xml"
+import "xml"
Wav :: struct {
// Basic data
path : string,
- handle : os.Handle,
format : Audio_Format,
channels : int,
sample_rate : int,
bit_depth : int,
reported_size : u32,
+ // Internals
+ handle : os.Handle,
+
// Metadata
+ date : Date,
channel_names : []string,
samples_since_midnight: u64,
- timecode : Timecode,
+ timecode : Timecode, // Derived from samples_since_midnight
tc_framerate : f32,
tc_dropframe : bool,
ubits : [8]u8,
@@ -32,7 +35,7 @@ Wav :: struct {
circled : bool,
}
Audio_Format :: enum {
- PCM = 1,
+ INT = 1,
FLOAT = 3,
}
Timecode :: struct {
@@ -41,23 +44,29 @@ Timecode :: struct {
second : u8,
frame : f32,
}
+Date :: struct {
+ year, month, day : int,
+}
+VERBOSE :: false
BUFFER_SIZE :: 1<<15
main :: proc() {
- enok, enok_ok := read_wav("test/ENOKS-BIRHTDAYT02.WAV", context.temp_allocator)
- fmt.printf("\n\nenok = %#v\n\n", enok)
- prins, prins_ok := read_wav("test/KRONPRINS01T01.wav", context.temp_allocator)
- fmt.printf("\n\nprins = %#v\n\n", prins)
- f8, f8_ok := read_wav("test/F8-SL098-T001.WAV", context.temp_allocator)
- fmt.printf("\n\nf8 = %#v\n\n", f8)
+ // Test
+ enok, enok_ok := read("test/WAVs/ENOKS-BIRHTDAYT02.WAV", context.temp_allocator)
+ when VERBOSE do fmt.printf("\n\nenok = %#v\n\n", enok)
+ prins, prins_ok := read("test/WAVs/KRONPRINS01T01.wav", context.temp_allocator)
+ when VERBOSE do fmt.printf("\n\nprins = %#v\n\n", prins)
+ f8, f8_ok := read("test/WAVs/F8-SL098-T001.WAV", context.temp_allocator)
+ when VERBOSE do fmt.printf("\n\nf8 = %#v\n\n", f8)
}
/*
-Reads in the wav file data, including metadata.
+Reads in the wav file metadata, without loading the sound data into ram.
*/
-read_wav :: proc(path : string, allocator:=context.allocator) -> (Wav, bool) {
+read :: proc(path : string, allocator:=context.allocator) -> (Wav, bool) #optional_ok {
file : Wav
+ file.path = path
load_err : os.Error
file.handle, load_err = os.open(path)
@@ -79,21 +88,21 @@ read_wav :: proc(path : string, allocator:=context.allocator) -> (Wav, bool) {
head : int = 0
// RIFF header
- fmt.println(string(temp_buf[0:4]))
+ when VERBOSE do fmt.println(string(temp_buf[0:4]))
if string(temp_buf[0:4]) != "RIFF" do return {}, false
head += 4
// Size
file.reported_size = read_little_endian_u32(temp_buf[head:head+4])
- fmt.println("Reported size:", file.reported_size)
+ when VERBOSE do fmt.println("Reported size:", file.reported_size)
head += 4
// Confirming again that this is a wave file
- fmt.println(string(temp_buf[head:head+4]))
+ when VERBOSE do fmt.println(string(temp_buf[head:head+4]))
if string(temp_buf[head:head+4]) != "WAVE" do return {}, false
head += 4
- fmt.println("\nChunks:\n")
+ when VERBOSE do fmt.println("\nChunks:\n")
// Looping through chunks
null_chunks := 0
@@ -103,7 +112,7 @@ read_wav :: proc(path : string, allocator:=context.allocator) -> (Wav, bool) {
head += 4
chunk_size := int(read_little_endian_u32(temp_buf[head:head+4]))
head += 4
- fmt.println(chunk_id, chunk_size,"\n-------------------------------------")
+ when VERBOSE do fmt.println(chunk_id, chunk_size,"\n-------------------------------------")
data_reached := false
next_chunk_start := head + chunk_size
@@ -122,13 +131,13 @@ read_wav :: proc(path : string, allocator:=context.allocator) -> (Wav, bool) {
null_chunks = 0
case "fmt ":
file.format = Audio_Format(read_little_endian_u16(temp_buf[head:]))
- fmt.println("Format:", file.format)
+ when VERBOSE do fmt.println("Format:", file.format)
head += 2
file.channels = int(read_little_endian_u16(temp_buf[head:]))
- fmt.println("Channels:", file.channels)
+ when VERBOSE do fmt.println("Channels:", file.channels)
head += 2
file.sample_rate = int(read_little_endian_u32(temp_buf[head:]))
- fmt.println("Sample rate:", file.sample_rate)
+ when VERBOSE do fmt.println("Sample rate:", file.sample_rate)
head += 4
// Skipping byte rate and block align.
@@ -137,7 +146,7 @@ read_wav :: proc(path : string, allocator:=context.allocator) -> (Wav, bool) {
head += 4 + 2
file.bit_depth = int(read_little_endian_u16(temp_buf[head:]))
- fmt.println("Bit depth:", file.bit_depth)
+ when VERBOSE do fmt.println("Bit depth:", file.bit_depth)
head += 2
head = data_end
null_chunks = 0
@@ -148,9 +157,9 @@ read_wav :: proc(path : string, allocator:=context.allocator) -> (Wav, bool) {
null_chunks += 1
}
}
- fmt.println(print_data, "\n")
+ when VERBOSE do fmt.println(print_data, "\n")
} else {
- fmt.println("End of buffer reached.")
+ when VERBOSE do fmt.println("End of buffer reached.")
break
}
@@ -158,11 +167,11 @@ read_wav :: proc(path : string, allocator:=context.allocator) -> (Wav, bool) {
head = next_chunk_start
if null_chunks > 3 {
- fmt.println("Got more than 3 null chunks in a row. Quitting parse.")
+ when VERBOSE do fmt.println("Got more than 3 null chunks in a row. Quitting parse.")
break
}
if data_reached {
- fmt.println("Data reached.")
+ when VERBOSE do fmt.println("Data reached.")
}
}
@@ -202,13 +211,13 @@ read_wav :: proc(path : string, allocator:=context.allocator) -> (Wav, bool) {
}
}
- fmt.printf("\n")
- tab(indent)
+ when VERBOSE do fmt.printf("\n")
+ when VERBOSE do tab(indent)
element := doc.elements[element_id]
if element.kind == .Element {
- fmt.printf("<%v>", element.ident)
+ when VERBOSE do fmt.printf("<%v>", element.ident)
if len(element.value) > 0 {
value := element.value[0]
@@ -301,18 +310,18 @@ read_wav :: proc(path : string, allocator:=context.allocator) -> (Wav, bool) {
for value in element.value {
switch v in value {
case string:
- fmt.printf(": %v", v)
+ when VERBOSE do fmt.printf(": %v", v)
case xml.Element_ID:
xml_recurse(doc, v, file, naming_channel, interleave_set, allocator, indent + 1)
}
}
for attr in element.attribs {
- tab(indent + 1)
- fmt.printf("[Attr] %v: %v\n", attr.key, attr.val)
+ when VERBOSE do tab(indent + 1)
+ when VERBOSE do fmt.printf("[Attr] %v: %v\n", attr.key, attr.val)
}
} else if element.kind == .Comment {
- fmt.printf("[COMMENT] %v\n", element.value)
+ when VERBOSE do fmt.printf("[COMMENT] %v\n", element.value)
}
return
@@ -382,15 +391,21 @@ read_wav :: proc(path : string, allocator:=context.allocator) -> (Wav, bool) {
}
}
head := 0
- fmt.printf("Description: \n%v\n", string(temp_bext[head:256]))
+ when VERBOSE do fmt.printf("Description: \n%v\n", string(temp_bext[head:256]))
head += 256
- fmt.printf("Originator: %v\n", string(temp_bext[head:head+32]))
+ when VERBOSE do fmt.printf("Originator: %v\n", string(temp_bext[head:head+32]))
head += 32
- fmt.printf("Originator Reference: %v\n", string(temp_bext[head:head+32]))
+ when VERBOSE do fmt.printf("Originator Reference: %v\n", string(temp_bext[head:head+32]))
head += 32
- fmt.printf("Origination Date: %v\n", string(temp_bext[head:head+10]))
+ date := string(temp_bext[head:head+10])
+ when VERBOSE do fmt.printf("Origination Date: %v\n", date)
+ date_splits := strings.split(date, "-")
+ file.date.year, _ = strconv.parse_int(date_splits[0])
+ file.date.month, _ = strconv.parse_int(date_splits[1])
+ file.date.day, _ = strconv.parse_int(date_splits[2])
+ delete(date_splits)
head += 10
- fmt.printf("Origination Time: %v\n", string(temp_bext[head:head+8]))
+ when VERBOSE do fmt.printf("Origination Time: %v\n", string(temp_bext[head:head+8]))
head += 8
file.samples_since_midnight = read_little_endian_u64(temp_bext[head:head+8])
@@ -400,19 +415,19 @@ read_wav :: proc(path : string, allocator:=context.allocator) -> (Wav, bool) {
file.timecode.minute = u8((seconds_since_midnight % 3600) / 60)
file.timecode.second = u8( seconds_since_midnight % 60)
file.timecode.frame = f32( f64(file.samples_since_midnight % u64(file.sample_rate) ) * f64(file.tc_framerate) / f64(file.sample_rate))
- fmt.printf("Time Reference: %v (Samples since midnight, source of timecode)\n", file.samples_since_midnight)
- fmt.printf(" %v seconds + %v samples\n", seconds_since_midnight, file.samples_since_midnight % u64(file.sample_rate))
+ when VERBOSE do fmt.printf("Time Reference: %v (Samples since midnight, source of timecode)\n", file.samples_since_midnight)
+ when VERBOSE do fmt.printf(" %v seconds + %v samples\n", seconds_since_midnight, file.samples_since_midnight % u64(file.sample_rate))
head += 8
- fmt.printf("Version: %v\n", read_little_endian_u16(temp_bext[head:head+2]))
+ when VERBOSE do fmt.printf("Version: %v\n", read_little_endian_u16(temp_bext[head:head+2]))
head += 2
- fmt.printf("UMID Skipped.\n")
+ when VERBOSE do fmt.printf("UMID Skipped.\n")
head += 64
- fmt.printf("Skipped reserved nothingness.\n")
+ when VERBOSE do fmt.printf("Skipped reserved nothingness.\n")
head += 190
- fmt.printf("Coding history:\n%v\n", string(temp_bext[head:]))
+ when VERBOSE do fmt.printf("Coding history:\n%v\n", string(temp_bext[head:]))
}
- fmt.println()
+ when VERBOSE do fmt.println()
// just here to make some printing prettier
temp_bext = nil
diff --git a/src/wav/xml/debug_print.odin b/src/wav/xml/debug_print.odin
new file mode 100644
index 0000000..9c47e79
--- /dev/null
+++ b/src/wav/xml/debug_print.odin
@@ -0,0 +1,86 @@
+package encoding_xml
+
+/*
+ An XML 1.0 / 1.1 parser
+
+ Copyright 2021-2022 Jeroen van Rijn <nom@duclavier.com>.
+ Made available under Odin's license.
+
+ A from-scratch XML implementation, loosely modeled on the [spec](https://www.w3.org/TR/2006/REC-xml11-20060816).
+
+ List of contributors:
+ Jeroen van Rijn: Initial implementation.
+*/
+
+
+import "core:io"
+import "core:fmt"
+
+/*
+ Just for debug purposes.
+*/
+print :: proc(writer: io.Writer, doc: ^Document) -> (written: int, err: io.Error) {
+ if doc == nil { return }
+ written += fmt.wprintf(writer, "[XML Prolog]\n")
+
+ for attr in doc.prologue {
+ written += fmt.wprintf(writer, "\t%v: %v\n", attr.key, attr.val)
+ }
+
+ written += fmt.wprintf(writer, "[Encoding] %v\n", doc.encoding)
+
+ if len(doc.doctype.ident) > 0 {
+ written += fmt.wprintf(writer, "[DOCTYPE] %v\n", doc.doctype.ident)
+
+ if len(doc.doctype.rest) > 0 {
+ fmt.wprintf(writer, "\t%v\n", doc.doctype.rest)
+ }
+ }
+
+ for comment in doc.comments {
+ written += fmt.wprintf(writer, "[Pre-root comment] %v\n", comment)
+ }
+
+ if len(doc.elements) > 0 {
+ fmt.wprintln(writer, " --- ")
+ print_element(writer, doc, 0)
+ fmt.wprintln(writer, " --- ")
+ }
+
+ return written, .None
+}
+
+print_element :: proc(writer: io.Writer, doc: ^Document, element_id: Element_ID, indent := 0) -> (written: int, err: io.Error) {
+ tab :: proc(writer: io.Writer, indent: int) {
+ for _ in 0..=indent {
+ fmt.wprintf(writer, "\t")
+ }
+ }
+
+ tab(writer, indent)
+
+ element := doc.elements[element_id]
+
+ if element.kind == .Element {
+ fmt.wprintf(writer, "<%v>\n", element.ident)
+
+ for value in element.value {
+ switch v in value {
+ case string:
+ tab(writer, indent + 1)
+ fmt.wprintf(writer, "[Value] %v\n", v)
+ case Element_ID:
+ print_element(writer, doc, v, indent + 1)
+ }
+ }
+
+ for attr in element.attribs {
+ tab(writer, indent + 1)
+ fmt.wprintf(writer, "[Attr] %v: %v\n", attr.key, attr.val)
+ }
+ } else if element.kind == .Comment {
+ fmt.wprintf(writer, "[COMMENT] %v\n", element.value)
+ }
+
+ return written, .None
+}
diff --git a/src/wav/xml/doc.odin b/src/wav/xml/doc.odin
new file mode 100644
index 0000000..9030cd4
--- /dev/null
+++ b/src/wav/xml/doc.odin
@@ -0,0 +1,23 @@
+/*
+A parser for a useful subset of the `XML` specification.
+
+A from-scratch `XML` implementation, loosely modelled on the [[ spec; https://www.w3.org/TR/2006/REC-xml11-20060816 ]].
+
+Features:
+- Supports enough of the XML 1.0/1.1 spec to handle the 99.9% of XML documents in common current usage.
+- Simple to understand and use. Small.
+
+Caveats:
+- We do NOT support HTML in this package, as that may or may not be valid XML.
+ If it works, great. If it doesn't, that's not considered a bug.
+
+- We do NOT support `UTF-16`. If you have a `UTF-16` XML file, please convert it to `UTF-8` first. Also, our condolences.
+- `<[!ELEMENT` and `<[!ATTLIST` are not supported, and will be either ignored or return an error depending on the parser options.
+
+MAYBE:
+- XML writer?
+- Serialize/deserialize Odin types?
+
+For a full example, see: [[ core/encoding/xml/example; https://github.com/odin-lang/Odin/tree/master/core/encoding/xml/example ]]
+*/
+package encoding_xml
diff --git a/src/wav/xml/helpers.odin b/src/wav/xml/helpers.odin
new file mode 100644
index 0000000..79f2d72
--- /dev/null
+++ b/src/wav/xml/helpers.odin
@@ -0,0 +1,52 @@
+package encoding_xml
+
+/*
+ An XML 1.0 / 1.1 parser
+
+ Copyright 2021-2022 Jeroen van Rijn <nom@duclavier.com>.
+ Made available under Odin's license.
+
+ This file contains helper functions.
+*/
+
+
+// Find parent's nth child with a given ident.
+find_child_by_ident :: proc(doc: ^Document, parent_id: Element_ID, ident: string, nth := 0) -> (res: Element_ID, found: bool) {
+ tag := doc.elements[parent_id]
+
+ count := 0
+ for v in tag.value {
+ switch child_id in v {
+ case string: continue
+ case Element_ID:
+ child := doc.elements[child_id]
+ /*
+ Skip commments. They have no name.
+ */
+ if child.kind != .Element { continue }
+
+ /*
+ If the ident matches and it's the nth such child, return it.
+ */
+ if child.ident == ident {
+ if count == nth { return child_id, true }
+ count += 1
+ }
+ }
+
+ }
+ return 0, false
+}
+
+// Find an attribute by key.
+find_attribute_val_by_key :: proc(doc: ^Document, parent_id: Element_ID, key: string) -> (val: string, found: bool) {
+ tag := doc.elements[parent_id]
+
+ for attr in tag.attribs {
+ /*
+ If the ident matches, we're done. There can only ever be one attribute with the same name.
+ */
+ if attr.key == key { return attr.val, true }
+ }
+ return "", false
+}
diff --git a/src/wav/xml/tokenizer.odin b/src/wav/xml/tokenizer.odin
new file mode 100644
index 0000000..f4c9c8a
--- /dev/null
+++ b/src/wav/xml/tokenizer.odin
@@ -0,0 +1,415 @@
+package encoding_xml
+
+/*
+ An XML 1.0 / 1.1 parser
+
+ Copyright 2021-2022 Jeroen van Rijn <nom@duclavier.com>.
+ Made available under Odin's license.
+
+ A from-scratch XML implementation, loosely modeled on the [spec](https://www.w3.org/TR/2006/REC-xml11-20060816).
+
+ List of contributors:
+ Jeroen van Rijn: Initial implementation.
+*/
+
+
+import "core:fmt"
+import "core:unicode"
+import "core:unicode/utf8"
+import "core:strings"
+
+Error_Handler :: #type proc(pos: Pos, fmt: string, args: ..any)
+
+Token :: struct {
+ kind: Token_Kind,
+ text: string,
+ pos: Pos,
+}
+
+Pos :: struct {
+ file: string,
+ offset: int, // starting at 0
+ line: int, // starting at 1
+ column: int, // starting at 1
+}
+
+Token_Kind :: enum {
+ Invalid,
+
+ Ident,
+ Literal,
+ Rune,
+ String,
+
+ Double_Quote, // "
+ Single_Quote, // '
+ Colon, // :
+
+ Eq, // =
+ Lt, // <
+ Gt, // >
+ Exclaim, // !
+ Question, // ?
+ Hash, // #
+ Slash, // /
+ Dash, // -
+
+ Open_Bracket, // [
+ Close_Bracket, // ]
+
+ EOF,
+}
+
+CDATA_START :: "<![CDATA["
+CDATA_END :: "]]>"
+
+COMMENT_START :: "<!--"
+COMMENT_END :: "-->"
+
+Tokenizer :: struct {
+ // Immutable data
+ path: string,
+ src: string,
+ err: Error_Handler,
+
+ // Tokenizing state
+ ch: rune,
+ offset: int,
+ read_offset: int,
+ line_offset: int,
+ line_count: int,
+
+ // Mutable data
+ error_count: int,
+}
+
+init :: proc(t: ^Tokenizer, src: string, path: string, err: Error_Handler = default_error_handler) {
+ t.src = src
+ t.err = err
+ t.ch = ' '
+ t.offset = 0
+ t.read_offset = 0
+ t.line_offset = 0
+ t.line_count = len(src) > 0 ? 1 : 0
+ t.error_count = 0
+ t.path = path
+
+ advance_rune(t)
+ if t.ch == utf8.RUNE_BOM {
+ advance_rune(t)
+ }
+}
+
+@(private)
+offset_to_pos :: proc(t: ^Tokenizer, offset: int) -> Pos {
+ line := t.line_count
+ column := offset - t.line_offset + 1
+
+ return Pos {
+ file = t.path,
+ offset = offset,
+ line = line,
+ column = column,
+ }
+}
+
+default_error_handler :: proc(pos: Pos, msg: string, args: ..any) {
+ fmt.eprintf("%s(%d:%d) ", pos.file, pos.line, pos.column)
+ fmt.eprintf(msg, ..args)
+ fmt.eprintf("\n")
+}
+
+error :: proc(t: ^Tokenizer, offset: int, msg: string, args: ..any) {
+ pos := offset_to_pos(t, offset)
+ if t.err != nil {
+ t.err(pos=pos, fmt=msg, args=args)
+ }
+ t.error_count += 1
+}
+
+@(optimization_mode="favor_size")
+advance_rune :: proc(t: ^Tokenizer) {
+ #no_bounds_check {
+ /*
+ Already bounds-checked here.
+ */
+ if t.read_offset < len(t.src) {
+ t.offset = t.read_offset
+ if t.ch == '\n' {
+ t.line_offset = t.offset
+ t.line_count += 1
+ }
+ r, w := rune(t.src[t.read_offset]), 1
+ switch {
+ case r == 0:
+ //error(t, t.offset, "illegal character NUL")
+ case r >= utf8.RUNE_SELF:
+ r, w = #force_inline utf8.decode_rune_in_string(t.src[t.read_offset:])
+ if r == utf8.RUNE_ERROR && w == 1 {
+ //error(t, t.offset, "illegal UTF-8 encoding")
+ } else if r == utf8.RUNE_BOM && t.offset > 0 {
+ //error(t, t.offset, "illegal byte order mark")
+ }
+ }
+ t.read_offset += w
+ t.ch = r
+ } else {
+ t.offset = len(t.src)
+ if t.ch == '\n' {
+ t.line_offset = t.offset
+ t.line_count += 1
+ }
+ t.ch = -1
+ }
+ }
+}
+
+peek_byte :: proc(t: ^Tokenizer, offset := 0) -> byte {
+ if t.read_offset+offset < len(t.src) {
+ #no_bounds_check return t.src[t.read_offset+offset]
+ }
+ return 0
+}
+
+@(optimization_mode="favor_size")
+skip_whitespace :: proc(t: ^Tokenizer) {
+ for {
+ switch t.ch {
+ case ' ', '\t', '\r', '\n':
+ advance_rune(t)
+ case:
+ return
+ }
+ }
+}
+
+@(optimization_mode="favor_size")
+is_letter :: proc(r: rune) -> bool {
+ if r < utf8.RUNE_SELF {
+ switch r {
+ case '_':
+ return true
+ case 'A'..='Z', 'a'..='z':
+ return true
+ }
+ }
+ return unicode.is_letter(r)
+}
+
+is_valid_identifier_rune :: proc(r: rune) -> bool {
+ if r < utf8.RUNE_SELF {
+ switch r {
+ case '_', '-', ':': return true
+ case 'A'..='Z', 'a'..='z': return true
+ case '0'..='9': return true
+ case -1: return false
+ }
+ }
+
+ if unicode.is_letter(r) || unicode.is_digit(r) {
+ return true
+ }
+ return false
+}
+
+scan_identifier :: proc(t: ^Tokenizer) -> string {
+ offset := t.offset
+ namespaced := false
+
+ for is_valid_identifier_rune(t.ch) {
+ advance_rune(t)
+ if t.ch == ':' {
+ // A namespaced attr can have at most two parts, `namespace:ident`.
+ if namespaced {
+ break
+ }
+ namespaced = true
+ }
+ }
+ return string(t.src[offset : t.offset])
+}
+
+/*
+ A comment ends when we see -->, preceded by a character that's not a dash.
+ "For compatibility, the string "--" (double-hyphen) must not occur within comments."
+
+ See: https://www.w3.org/TR/2006/REC-xml11-20060816/#dt-comment
+
+ Thanks to the length (4) of the comment start, we also have enough lookback,
+ and the peek at the next byte asserts that there's at least one more character
+ that's a `>`.
+*/
+scan_comment :: proc(t: ^Tokenizer) -> (comment: string, err: Error) {
+ offset := t.offset
+
+ for {
+ advance_rune(t)
+ ch := t.ch
+
+ if ch < 0 {
+ //error(t, offset, "[parse] Comment was not terminated\n")
+ return "", .Unclosed_Comment
+ }
+
+ if string(t.src[t.offset - 1:][:2]) == "--" {
+ if peek_byte(t) == '>' {
+ break
+ } else {
+ //error(t, t.offset - 1, "Invalid -- sequence in comment.\n")
+ return "", .Invalid_Sequence_In_Comment
+ }
+ }
+ }
+
+ expect(t, .Dash)
+ expect(t, .Gt)
+
+ return string(t.src[offset : t.offset - 1]), .None
+}
+
+// Skip CDATA
+skip_cdata :: proc(t: ^Tokenizer) -> (err: Error) {
+ if s := string(t.src[t.offset:]); !strings.has_prefix(s, CDATA_START) {
+ return .None
+ }
+
+ t.read_offset += len(CDATA_START)
+ offset := t.offset
+
+ cdata_scan: for {
+ advance_rune(t)
+ if t.ch < 0 {
+ //error(t, offset, "[scan_string] CDATA was not terminated\n")
+ return .Premature_EOF
+ }
+
+ // Scan until the end of a CDATA tag.
+ if s := string(t.src[t.read_offset:]); strings.has_prefix(s, CDATA_END) {
+ t.read_offset += len(CDATA_END)
+ break cdata_scan
+ }
+ }
+ return .None
+}
+
+@(optimization_mode="favor_size")
+scan_string :: proc(t: ^Tokenizer, offset: int, close: rune = '<', consume_close := false, multiline := true) -> (value: string, err: Error) {
+ err = .None
+
+ loop: for {
+ ch := t.ch
+
+ switch ch {
+ case -1:
+ //error(t, t.offset, "[scan_string] Premature end of file.\n")
+ return "", .Premature_EOF
+
+ case '<':
+ if peek_byte(t) == '!' {
+ if peek_byte(t, 1) == '[' {
+ // Might be the start of a CDATA tag.
+ skip_cdata(t) or_return
+ } else if peek_byte(t, 1) == '-' && peek_byte(t, 2) == '-' {
+ // Comment start. Eat comment.
+ t.read_offset += 3
+ _ = scan_comment(t) or_return
+ }
+ }
+
+ case '\n':
+ if !multiline {
+ //error(t, offset, string(t.src[offset : t.offset]))
+ //error(t, offset, "[scan_string] Not terminated\n")
+ err = .Invalid_Tag_Value
+ break loop
+ }
+ }
+
+ if t.ch == close {
+ // If it's not a CDATA or comment, it's the end of this body.
+ break loop
+ }
+ advance_rune(t)
+ }
+
+ // Strip trailing whitespace.
+ lit := string(t.src[offset : t.offset])
+
+ end := len(lit)
+ eat: for ; end > 0; end -= 1 {
+ ch := lit[end - 1]
+ switch ch {
+ case ' ', '\t', '\r', '\n':
+ case:
+ break eat
+ }
+ }
+ lit = lit[:end]
+
+ if consume_close {
+ advance_rune(t)
+ }
+ return lit, err
+}
+
+peek :: proc(t: ^Tokenizer) -> (token: Token) {
+ old := t^
+ token = scan(t)
+ t^ = old
+ return token
+}
+
+scan :: proc(t: ^Tokenizer, multiline_string := false) -> Token {
+ skip_whitespace(t)
+
+ offset := t.offset
+
+ kind: Token_Kind
+ err: Error
+ lit: string
+ pos := offset_to_pos(t, offset)
+
+ switch ch := t.ch; true {
+ case is_letter(ch):
+ lit = scan_identifier(t)
+ kind = .Ident
+
+ case:
+ advance_rune(t)
+ switch ch {
+ case -1:
+ kind = .EOF
+
+ case '<': kind = .Lt
+ case '>': kind = .Gt
+ case '!': kind = .Exclaim
+ case '?': kind = .Question
+ case '=': kind = .Eq
+ case '#': kind = .Hash
+ case '/': kind = .Slash
+ case '-': kind = .Dash
+ case ':': kind = .Colon
+ case '[': kind = .Open_Bracket
+ case ']': kind = .Close_Bracket
+
+ case '"', '\'':
+ kind = .Invalid
+
+ lit, err = scan_string(t, t.offset, ch, true, multiline_string)
+ if err == .None {
+ kind = .String
+ }
+
+ case '\n':
+ lit = "\n"
+
+ case:
+ kind = .Invalid
+ }
+ }
+
+ if kind != .String && lit == "" {
+ lit = string(t.src[offset : t.offset])
+ }
+ return Token{kind, lit, pos}
+}
diff --git a/src/wav/xml/xml_reader.odin b/src/wav/xml/xml_reader.odin
new file mode 100644
index 0000000..c19cbf6
--- /dev/null
+++ b/src/wav/xml/xml_reader.odin
@@ -0,0 +1,628 @@
+package encoding_xml
+/*
+ An XML 1.0 / 1.1 parser
+
+ 2021-2022 Jeroen van Rijn <nom@duclavier.com>.
+ available under Odin's license.
+
+ List of contributors:
+ - Jeroen van Rijn: Initial implementation.
+*/
+
+import "core:bytes"
+import "core:encoding/entity"
+import "base:intrinsics"
+import "core:mem"
+import "core:os"
+import "core:strings"
+import "base:runtime"
+
+likely :: intrinsics.expect
+
+DEFAULT_OPTIONS :: Options{
+ flags = {.Ignore_Unsupported},
+ expected_doctype = "",
+}
+
+Option_Flag :: enum {
+ // If the caller says that input may be modified, we can perform in-situ parsing.
+ // If this flag isn't provided, the XML parser first duplicates the input so that it can.
+ Input_May_Be_Modified,
+
+ // Document MUST start with `<?xml` prologue.
+ Must_Have_Prolog,
+
+ // Document MUST have a `<!DOCTYPE`.
+ Must_Have_DocType,
+
+ // By default we skip comments. Use this option to intern a comment on a parented Element.
+ Intern_Comments,
+
+ // How to handle unsupported parts of the specification, like <! other than <!DOCTYPE and <![CDATA[
+ Error_on_Unsupported,
+ Ignore_Unsupported,
+
+ // By default CDATA tags are passed-through as-is.
+ // This option unwraps them when encountered.
+ Unbox_CDATA,
+
+ // By default SGML entities like `&gt;`, `&#32;` and `&#x20;` are passed-through as-is.
+ // This option decodes them when encountered.
+ Decode_SGML_Entities,
+
+ // If a tag body has a comment, it will be stripped unless this option is given.
+ Keep_Tag_Body_Comments,
+}
+Option_Flags :: bit_set[Option_Flag; u16]
+
+Document :: struct {
+ elements: [dynamic]Element `fmt:"v,element_count"`,
+ element_count: Element_ID,
+
+ prologue: Attributes,
+ encoding: Encoding,
+
+ doctype: struct {
+ // We only scan the <!DOCTYPE IDENT part and skip the rest.
+ ident: string,
+ rest: string,
+ },
+
+ // If we encounter comments before the root node, and the option to intern comments is given, this is where they'll live.
+ // Otherwise they'll be in the element tree.
+ comments: [dynamic]string `fmt:"-"`,
+
+ // Internal
+ tokenizer: ^Tokenizer `fmt:"-"`,
+ allocator: mem.Allocator `fmt:"-"`,
+
+ // Input. Either the original buffer, or a copy if `.Input_May_Be_Modified` isn't specified.
+ input: []u8 `fmt:"-"`,
+ strings_to_free: [dynamic]string `fmt:"-"`,
+}
+
+Element :: struct {
+ ident: string,
+ value: [dynamic]Value,
+ attribs: Attributes,
+
+ kind: enum {
+ Element = 0,
+ Comment,
+ },
+ parent: Element_ID,
+}
+
+Value :: union {
+ string,
+ Element_ID,
+}
+
+Attribute :: struct {
+ key: string,
+ val: string,
+}
+
+Attributes :: [dynamic]Attribute
+
+Options :: struct {
+ flags: Option_Flags,
+ expected_doctype: string,
+}
+
+Encoding :: enum {
+ Unknown,
+
+ UTF_8,
+ ISO_8859_1,
+
+ // Aliases
+ LATIN_1 = ISO_8859_1,
+}
+
+Error :: enum {
+ // General return values.
+ None = 0,
+ General_Error,
+ Unexpected_Token,
+ Invalid_Token,
+
+ // Couldn't find, open or read file.
+ File_Error,
+
+ // File too short.
+ Premature_EOF,
+
+ // XML-specific errors.
+ No_Prolog,
+ Invalid_Prolog,
+ Too_Many_Prologs,
+
+ No_DocType,
+ Too_Many_DocTypes,
+ DocType_Must_Preceed_Elements,
+
+ // If a DOCTYPE is present _or_ the caller
+ // asked for a specific DOCTYPE and the DOCTYPE
+ // and root tag don't match, we return `.Invalid_DocType`.
+ Invalid_DocType,
+
+ Invalid_Tag_Value,
+ Mismatched_Closing_Tag,
+
+ Unclosed_Comment,
+ Comment_Before_Root_Element,
+ Invalid_Sequence_In_Comment,
+
+ Unsupported_Version,
+ Unsupported_Encoding,
+
+ // <!FOO are usually skipped.
+ Unhandled_Bang,
+
+ Duplicate_Attribute,
+ Conflicting_Options,
+}
+
+parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_handler := default_error_handler, allocator := context.allocator) -> (doc: ^Document, err: Error) {
+ data := data
+ context.allocator = allocator
+
+ opts := validate_options(options) or_return
+
+ // If `.Input_May_Be_Modified` is not specified, we duplicate the input so that we can modify it in-place.
+ if .Input_May_Be_Modified not_in opts.flags {
+ data = bytes.clone(data)
+ }
+
+ t := new(Tokenizer)
+ init(t, string(data), path, error_handler)
+
+ doc = new(Document)
+ doc.allocator = allocator
+ doc.tokenizer = t
+ doc.input = data
+
+ doc.elements = make([dynamic]Element, 1024, 1024, allocator)
+
+ err = .Unexpected_Token
+ element, parent: Element_ID
+ open: Token
+
+ // If a DOCTYPE is present, the root tag has to match.
+ // If an expected DOCTYPE is given in options (i.e. it's non-empty), the DOCTYPE (if present) and root tag have to match.
+ expected_doctype := options.expected_doctype
+
+ loop: for {
+ skip_whitespace(t)
+ switch t.ch {
+ case '<':
+ // Consume peeked `<`
+ advance_rune(t)
+
+ open = scan(t)
+ // NOTE(Jeroen): We're not using a switch because this if-else chain ordered by likelihood is 2.5% faster at -o:size and -o:speed.
+ if likely(open.kind, Token_Kind.Ident) == .Ident {
+ // e.g. <odin - Start of new element.
+ element = new_element(doc)
+ if element == 0 { // First Element
+ parent = element
+ } else {
+ append(&doc.elements[parent].value, element)
+ }
+
+ doc.elements[element].parent = parent
+ doc.elements[element].ident = open.text
+
+ parse_attributes(doc, &doc.elements[element].attribs) or_return
+
+ // If a DOCTYPE is present _or_ the caller
+ // asked for a specific DOCTYPE and the DOCTYPE
+ // and root tag don't match, we return .Invalid_Root_Tag.
+ if element == 0 { // Root tag?
+ if len(expected_doctype) > 0 && expected_doctype != open.text {
+ //error(t, t.offset, "Root Tag doesn't match DOCTYPE. Expected: %v, got: %v\n", expected_doctype, open.text)
+ return doc, .Invalid_DocType
+ }
+ }
+
+ // One of these should follow:
+ // - `>`, which means we've just opened this tag and expect a later element to close it.
+ // - `/>`, which means this is an 'empty' or self-closing tag.
+ end_token := scan(t)
+ #partial switch end_token.kind {
+ case .Gt:
+ // We're now the new parent.
+ parent = element
+
+ case .Slash:
+ // Empty tag. Close it.
+ expect(t, .Gt) or_return
+ parent = doc.elements[element].parent
+ element = parent
+
+ case:
+ //error(t, t.offset, "Expected close tag, got: %#v\n", end_token)
+ return
+ }
+
+ } else if open.kind == .Slash {
+ // Close tag.
+ ident := expect(t, .Ident) or_return
+ _ = expect(t, .Gt) or_return
+
+ if doc.elements[element].ident != ident.text {
+ //error(t, t.offset, "Mismatched Closing Tag. Expected %v, got %v\n", doc.elements[element].ident, ident.text)
+ return doc, .Mismatched_Closing_Tag
+ }
+ parent = doc.elements[element].parent
+ element = parent
+
+ } else if open.kind == .Exclaim {
+ // <!
+ next := scan(t)
+ #partial switch next.kind {
+ case .Ident:
+ switch next.text {
+ case "DOCTYPE":
+ if len(doc.doctype.ident) > 0 {
+ return doc, .Too_Many_DocTypes
+ }
+ if doc.element_count > 0 {
+ return doc, .DocType_Must_Preceed_Elements
+ }
+ parse_doctype(doc) or_return
+
+ if len(expected_doctype) > 0 && expected_doctype != doc.doctype.ident {
+ //error(t, t.offset, "Invalid DOCTYPE. Expected: %v, got: %v\n", expected_doctype, doc.doctype.ident)
+ return doc, .Invalid_DocType
+ }
+ expected_doctype = doc.doctype.ident
+
+ case:
+ if .Error_on_Unsupported in opts.flags {
+ //error(t, t.offset, "Unhandled: <!%v\n", next.text)
+ return doc, .Unhandled_Bang
+ }
+ skip_element(t) or_return
+ }
+
+ case .Dash:
+ // Comment: <!-- -->.
+ // The grammar does not allow a comment to end in --->
+ expect(t, .Dash)
+ comment := scan_comment(t) or_return
+
+ if .Intern_Comments in opts.flags {
+ if len(doc.elements) == 0 {
+ append(&doc.comments, comment)
+ } else {
+ el := new_element(doc)
+ doc.elements[el].parent = element
+ doc.elements[el].kind = .Comment
+ append(&doc.elements[el].value, comment)
+ append(&doc.elements[element].value, el)
+ }
+ }
+
+ case .Open_Bracket:
+ // This could be a CDATA tag part of a tag's body. Unread the `<![`
+ t.offset -= 3
+
+ // Instead of calling `parse_body` here, we could also `continue loop`
+ // and fall through to the `case:` at the bottom of the outer loop.
+ // This makes the intent clearer.
+ parse_body(doc, element, opts) or_return
+
+ case:
+ //error(t, t.offset, "Unexpected Token after <!: %#v", next)
+ }
+
+ } else if open.kind == .Question {
+ // <?xml
+ next := scan(t)
+ #partial switch next.kind {
+ case .Ident:
+ if len(next.text) == 3 && strings.equal_fold(next.text, "xml") {
+ parse_prologue(doc) or_return
+ } else if len(doc.prologue) > 0 {
+ // We've already seen a prologue.
+ return doc, .Too_Many_Prologs
+ } else {
+ // Could be `<?xml-stylesheet`, etc. Ignore it.
+ skip_element(t) or_return
+ }
+ case:
+ //error(t, t.offset, "Expected \"<?xml\", got \"<?%v\".", next.text)
+ return
+ }
+
+ } else {
+ //error(t, t.offset, "Invalid Token after <: %#v\n", open)
+ return
+ }
+
+ case -1:
+ // End of file.
+ break loop
+
+ case:
+ // This should be a tag's body text.
+ parse_body(doc, element, opts) or_return
+ }
+ }
+
+ if .Must_Have_Prolog in opts.flags && len(doc.prologue) == 0 {
+ return doc, .No_Prolog
+ }
+
+ if .Must_Have_DocType in opts.flags && len(doc.doctype.ident) == 0 {
+ return doc, .No_DocType
+ }
+
+ resize(&doc.elements, int(doc.element_count))
+ return doc, .None
+}
+
+parse_string :: proc(data: string, options := DEFAULT_OPTIONS, path := "", error_handler := default_error_handler, allocator := context.allocator) -> (doc: ^Document, err: Error) {
+ _data := transmute([]u8)data
+
+ return parse_bytes(_data, options, path, error_handler, allocator)
+}
+
+parse :: proc { parse_string, parse_bytes }
+
+// Load an XML file
+load_from_file :: proc(filename: string, options := DEFAULT_OPTIONS, error_handler := default_error_handler, allocator := context.allocator) -> (doc: ^Document, err: Error) {
+ context.allocator = allocator
+ options := options
+
+ data, data_ok := os.read_entire_file(filename)
+ if !data_ok { return {}, .File_Error }
+
+ options.flags += { .Input_May_Be_Modified }
+
+ return parse_bytes(data, options, filename, error_handler, allocator)
+}
+
+destroy :: proc(doc: ^Document) {
+ if doc == nil { return }
+
+ for el in doc.elements {
+ delete(el.attribs)
+ delete(el.value)
+ }
+ delete(doc.elements)
+
+ delete(doc.prologue)
+ delete(doc.comments)
+ delete(doc.input)
+
+ for s in doc.strings_to_free {
+ delete(s)
+ }
+ delete(doc.strings_to_free)
+
+ free(doc.tokenizer)
+ free(doc)
+}
+
+/*
+ Helpers.
+*/
+
+validate_options :: proc(options: Options) -> (validated: Options, err: Error) {
+ validated = options
+
+ if .Error_on_Unsupported in validated.flags && .Ignore_Unsupported in validated.flags {
+ return options, .Conflicting_Options
+ }
+ return validated, .None
+}
+
+expect :: proc(t: ^Tokenizer, kind: Token_Kind, multiline_string := false) -> (tok: Token, err: Error) {
+ tok = scan(t, multiline_string=multiline_string)
+ if tok.kind == kind { return tok, .None }
+
+ //error(t, t.offset, "Expected \"%v\", got \"%v\".", kind, tok.kind)
+ return tok, .Unexpected_Token
+}
+
+parse_attribute :: proc(doc: ^Document) -> (attr: Attribute, offset: int, err: Error) {
+ assert(doc != nil)
+ context.allocator = doc.allocator
+ t := doc.tokenizer
+
+ key := expect(t, .Ident) or_return
+ _ = expect(t, .Eq) or_return
+ value := expect(t, .String, multiline_string=true) or_return
+
+ normalized, normalize_err := entity.decode_xml(value.text, {.Normalize_Whitespace}, doc.allocator)
+ if normalize_err == .None {
+ append(&doc.strings_to_free, normalized)
+ value.text = normalized
+ }
+
+ attr.key = key.text
+ attr.val = value.text
+
+ err = .None
+ return
+}
+
+check_duplicate_attributes :: proc(t: ^Tokenizer, attribs: Attributes, attr: Attribute, offset: int) -> (err: Error) {
+ for a in attribs {
+ if attr.key == a.key {
+ //error(t, offset, "Duplicate attribute: %v\n", attr.key)
+ return .Duplicate_Attribute
+ }
+ }
+ return .None
+}
+
+parse_attributes :: proc(doc: ^Document, attribs: ^Attributes) -> (err: Error) {
+ assert(doc != nil)
+ context.allocator = doc.allocator
+ t := doc.tokenizer
+
+ for peek(t).kind == .Ident {
+ attr, offset := parse_attribute(doc) or_return
+ check_duplicate_attributes(t, attribs^, attr, offset) or_return
+ append(attribs, attr)
+ }
+ skip_whitespace(t)
+ return .None
+}
+
+parse_prologue :: proc(doc: ^Document) -> (err: Error) {
+ assert(doc != nil)
+ context.allocator = doc.allocator
+ t := doc.tokenizer
+
+ offset := t.offset
+ parse_attributes(doc, &doc.prologue) or_return
+
+ for attr in doc.prologue {
+ switch attr.key {
+ case "version":
+ switch attr.val {
+ case "1.0", "1.1":
+ case:
+ //error(t, offset, "[parse_prologue] Warning: Unhandled XML version: %v\n", attr.val)
+ }
+
+ case "encoding":
+ runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD()
+ switch strings.to_lower(attr.val, context.temp_allocator) {
+ case "utf-8", "utf8":
+ doc.encoding = .UTF_8
+
+ case "latin-1", "latin1", "iso-8859-1":
+ doc.encoding = .LATIN_1
+
+ case:
+ // Unrecognized encoding, assume UTF-8.
+ //error(t, offset, "[parse_prologue] Warning: Unrecognized encoding: %v\n", attr.val)
+ }
+
+ case:
+ // Ignored.
+ }
+ }
+
+ _ = expect(t, .Question) or_return
+ _ = expect(t, .Gt) or_return
+
+ return .None
+}
+
+skip_element :: proc(t: ^Tokenizer) -> (err: Error) {
+ close := 1
+
+ loop: for {
+ tok := scan(t)
+ #partial switch tok.kind {
+ case .EOF:
+ //error(t, t.offset, "[skip_element] Premature EOF\n")
+ return .Premature_EOF
+
+ case .Lt:
+ close += 1
+
+ case .Gt:
+ close -= 1
+ if close == 0 {
+ break loop
+ }
+
+ case:
+
+ }
+ }
+ return .None
+}
+
+parse_doctype :: proc(doc: ^Document) -> (err: Error) {
+ /*
+ <!DOCTYPE greeting SYSTEM "hello.dtd">
+
+ <!DOCTYPE greeting [
+ <!ELEMENT greeting (#PCDATA)>
+ ]>
+ */
+ assert(doc != nil)
+ context.allocator = doc.allocator
+ t := doc.tokenizer
+
+ tok := expect(t, .Ident) or_return
+ doc.doctype.ident = tok.text
+
+ skip_whitespace(t)
+ offset := t.offset
+ skip_element(t) or_return
+
+ // -1 because the current offset is that of the closing tag, so the rest of the DOCTYPE tag ends just before it.
+ doc.doctype.rest = string(t.src[offset : t.offset - 1])
+ return .None
+}
+
+parse_body :: proc(doc: ^Document, element: Element_ID, opts: Options) -> (err: Error) {
+ assert(doc != nil)
+ context.allocator = doc.allocator
+ t := doc.tokenizer
+
+ body_text := scan_string(t, t.offset) or_return
+ needs_processing := .Unbox_CDATA in opts.flags
+ needs_processing |= .Decode_SGML_Entities in opts.flags
+
+ if !needs_processing {
+ append(&doc.elements[element].value, body_text)
+ return
+ }
+
+ decode_opts := entity.XML_Decode_Options{}
+ if .Keep_Tag_Body_Comments not_in opts.flags {
+ decode_opts += { .Comment_Strip }
+ }
+
+ if .Decode_SGML_Entities not_in opts.flags {
+ decode_opts += { .No_Entity_Decode }
+ }
+
+ if .Unbox_CDATA in opts.flags {
+ decode_opts += { .Unbox_CDATA }
+ if .Decode_SGML_Entities in opts.flags {
+ decode_opts += { .Decode_CDATA }
+ }
+ }
+
+ decoded, decode_err := entity.decode_xml(body_text, decode_opts)
+ if decode_err == .None {
+ append(&doc.elements[element].value, decoded)
+ append(&doc.strings_to_free, decoded)
+ } else {
+ append(&doc.elements[element].value, body_text)
+ }
+
+ return
+}
+
+Element_ID :: u32
+
+new_element :: proc(doc: ^Document) -> (id: Element_ID) {
+ element_space := len(doc.elements)
+
+ // Need to resize
+ if int(doc.element_count) + 1 > element_space {
+ if element_space < 65536 {
+ element_space *= 2
+ } else {
+ element_space += 65536
+ }
+ resize(&doc.elements, element_space)
+ }
+
+ cur := doc.element_count
+ doc.element_count += 1
+ return cur
+} \ No newline at end of file