diff options
| author | San Jacobs | 2025-12-11 20:44:02 +0100 |
|---|---|---|
| committer | San Jacobs | 2025-12-11 20:44:02 +0100 |
| commit | cb7ef81fd339199c69eccd93105c13d2a1f41f71 (patch) | |
| tree | 7109109bd690e91ff74ef2c09605e5626c1d9179 /src | |
| parent | 00121d7c14a3bfa03c5eeb6c28b5edf060baf029 (diff) | |
| download | better-report-cb7ef81fd339199c69eccd93105c13d2a1f41f71.tar.gz better-report-cb7ef81fd339199c69eccd93105c13d2a1f41f71.tar.bz2 better-report-cb7ef81fd339199c69eccd93105c13d2a1f41f71.zip | |
We can now generate reports straight from the WAVs! No CSV needed!
Diffstat (limited to 'src')
| -rw-r--r-- | src/header_template.txt | 6 | ||||
| -rw-r--r-- | src/main.odin | 410 | ||||
| -rw-r--r-- | src/wav/wav.odin | 103 | ||||
| -rw-r--r-- | src/wav/xml/debug_print.odin | 86 | ||||
| -rw-r--r-- | src/wav/xml/doc.odin | 23 | ||||
| -rw-r--r-- | src/wav/xml/helpers.odin | 52 | ||||
| -rw-r--r-- | src/wav/xml/tokenizer.odin | 415 | ||||
| -rw-r--r-- | src/wav/xml/xml_reader.odin | 628 |
8 files changed, 1600 insertions, 123 deletions
diff --git a/src/header_template.txt b/src/header_template.txt index 8d42a55..cc4477a 100644 --- a/src/header_template.txt +++ b/src/header_template.txt @@ -1,14 +1,14 @@ # Header info fields # # In this file, you can write things you want to appear in the header of the sound report. -# These fields will be inserted into sound reports generated directly from wav metadata. +# These fields will only be inserted into sound reports generated directly from wav metadata. # This happens when you submit a folder with .WAV-files that doesn't contain a CSV file. # # Empty lines, and lines starting with # are ignored. -# No need to add facts about the files, those are added automatically from metadata. +# Don't add facts about the files, those are added automatically from metadata. +# This includes project name and tape, which are generally recorded in the files themselves. Sound Mixer: Ola Nordmann Phone: +0 123 45 678 Email: ola@nordmann.no Boom Operator: Sven Svensson -Project: Project Name diff --git a/src/main.odin b/src/main.odin index c0f66ba..7255dd6 100644 --- a/src/main.odin +++ b/src/main.odin @@ -6,6 +6,7 @@ import "core:os/os2" import "core:path/filepath" import "core:sys/windows" import "core:strings" +import "core:math" import "wav" /* @@ -45,6 +46,7 @@ Info_Line :: struct { } Report :: struct { + // Content title : string, info_lines : []Info_Line, header : []string, @@ -53,12 +55,15 @@ Report :: struct { row_count : int, info_line_count : int, tc_column_index : int, + + // Meta + path : string, } CSV :: string Directory :: [dynamic]string Job :: union {CSV, Directory} -job_list :: [dynamic]Job +job_list : [dynamic]Job // TODO: Changing file_list to job_list, so the Directory jobs can contain a list of all the relevant .wav files before being sent to parse_folder() main :: proc() { @@ -92,33 +97,48 @@ main :: proc() { if(path_info.is_dir) { fmt.printf("Directory submitted! Walking directory...\n\n") fmt.printf("š {}\n", path_info.name) - try_os2 := walk_directory(path_info.fullpath, &file_count, &job_list, 1) + try_os2 := walk_directory(path_info.fullpath, &file_count, 1) if len(job_list) == 0 && try_os2 { fmt.printf("\nNot_Dir error encountered. Trying os2 version...\n\n") fmt.printf("š {}\n", path_info.name) - walk_directory_os2(path_info.fullpath, &file_count, &job_list, 1) + walk_directory_os2(path_info.fullpath, &file_count, 1) } } else { fmt.println("File submitted! Processing file...") - append(&job_list, strings.clone(path_info.fullpath)) + append(&job_list, CSV(strings.clone(path_info.fullpath))) } - for file, f in job_list { + for job, i in job_list { - file_info, _ := os.stat(file) - fmt.printf("\nš File {}: {}\n", f+1, file_info.name) - parsed, ok_parse := parse(file_info.fullpath) - if !ok_parse { - fmt.printf("Parse failed: {}\n", file_info.fullpath) - continue + parsed : Report + parse_ok : bool + switch file in job { + case CSV: + file_info, _ := os.stat(file) + fmt.printf("\nš File {}: {}\n", i+1, file_info.name) + parsed, parse_ok = parse_file(file_info.fullpath) + if !parse_ok { + fmt.printf("Parse failed: {}\n", file_info.fullpath) + continue + } + case Directory: + fmt.printf("\nš Folder {}: ", i+1) + parsed, parse_ok = parse_folder(file) + fmt.printf("{}", parsed.title) + if parse_ok { + fmt.printf("\nParsed %d WAV(s).\n", parsed.row_count) + } else { + file_info, _ := os.stat(file[0]) + fmt.printf("\nParse failed: {}\n", file_info.fullpath) + continue + } } - output_name := fmt.aprintf("{}/{}_Knekt_Lydrapport.html", filepath.dir(file_info.fullpath), parsed.title, allocator=context.temp_allocator) - render(parsed, output_name) + render(parsed) free_all(context.temp_allocator) files_done += 1 } - fmt.printf("\nCompleted {}/{} files.\n\n", files_done, len(job_list)) + fmt.printf("\nCompleted {}/{} job(s).\n\n", files_done, len(job_list)) } else { fmt.printf("ERROR could not get path info for: {}\n", input_file_name) } @@ -126,19 +146,253 @@ main :: proc() { } -parse :: proc(path : string, device : Device = .UNSET) -> (Report, bool) { - if os.is_dir(path) { - return parse_folder(path) +parse_folder :: proc(paths : Directory) -> (Report, bool) { + + // 888 888 888 8888b. 888 888 + // 888 888 888 "88b 888 888 + // 888 888 888 .d888888 Y88 88P + // d8b Y88b 888 d88P 888 888 Y8bd8P + // Y8P "Y8888888P" "Y888888 Y88P + + output : Report = {} + + wavs : [dynamic]wav.Wav + + max_channels := 0 + for path, i in paths { + w, ok := wav.read(path) + if ok { + append(&wavs, w) + max_channels = max(max_channels, w.channels) + } + } + + header_build : [dynamic]string + append(&header_build, "Circled") + append(&header_build, "File Name") + append(&header_build, "Scene") + append(&header_build, "Take") + append(&header_build, "Timecode") + append(&header_build, "TC FPS") + append(&header_build, "User Bits") + append(&header_build, "Tape") + append(&header_build, "Date") + append(&header_build, "Project") + append(&header_build, "Sample Rate") + append(&header_build, "Format") // Bit depth and int vs float + first_channel_index := len(header_build) + last_channel_index := -1 + for i in 0..<max_channels { + track_title := fmt.aprintf("Track %d", i+1) + last_channel_index = len(header_build) + append(&header_build, track_title) + } + append(&header_build, "Note") + output.header = header_build[:] + + output.column_count = len(header_build) + output.row_count = len(wavs) + + output.table = make([][]string, output.row_count, context.temp_allocator) + for &row in output.table { + row = make([]string, output.column_count, context.temp_allocator) + } + + output.info_lines = make([]Info_Line, 64, context.temp_allocator) + + info_txt, info_txt_ok := os.read_entire_file(HEADER_FIELDS_PATH, context.temp_allocator) + if info_txt_ok { + it := string(info_txt) + line_index := 0 + for line in strings.split_lines_iterator(&it) { + if strings.starts_with(line, "#") { + continue + } + if len(line)<2 { + continue + } + colon := strings.index_rune(line, ':') + if colon==-1 { + continue + } + CUTSET :: " " + output.info_lines[line_index].field = strings.trim(line[:colon+1], CUTSET) + output.info_lines[line_index].entry = strings.trim(line[colon+1:], CUTSET) + line_index += 1 + } + output.info_lines[line_index].field = " " + output.info_lines[line_index].entry = "- - - - -" + line_index += 1 + output.info_line_count = line_index } - return parse_file(path, device) -} -parse_folder :: proc(path : string) -> (Report, bool) { - output : Report = {} - return output, false + + // Populating the table with data + + for w, i in wavs { + row := output.table[i] + stat, _ := os.stat(w.path, allocator=context.temp_allocator) + + for name, i in w.channel_names { + row[first_channel_index + i] = name + } + for title, i in output.header { + switch title { + case "File Name": + row[i] = stat.name + case "Scene": + row[i] = w.scene + case "Take": + row[i] = fmt.tprintf("T%03d", w.take) + case "Timecode": + row[i] = fmt.tprintf("%02d:%02d:%02d:%02d", // Timecode + w.timecode.hour, + w.timecode.minute, + w.timecode.second, + int(math.round(w.timecode.frame))) + case "TC FPS": + if w.tc_dropframe { // TC FPS + row[i] = fmt.tprintf("%.03f DF", w.tc_framerate) + } else { + row[i] = fmt.tprintf("%.03f ND", w.tc_framerate) + } + case "User Bits": + if w.ubits != {0,0,0,0,0,0,0,0,} { + row[i] = fmt.tprintf("%d%d%d%d%d%d%d%d", expand_values(w.ubits)) + } + case "Tape": + row[i] = w.tape + case "Date": + row[i] = fmt.tprintf("%04d-%02d-%02d", expand_values(w.date)) + case "Project": + row[i] = w.project + case "Sample Rate": + row[i] = fmt.tprintf("%d Hz", w.sample_rate) + case "Format": + switch w.format { // "Format", aka bit depth + int vs float + case .INT: + row[i] = fmt.tprintf("%d-bit int", w.bit_depth) + case .FLOAT: + row[i] = fmt.tprintf("%d-bit float", w.bit_depth) + } + case "Circled": + if w.circled do row[i] = "O" + case "Note": + row[i] = w.note + } + } + } + + + + // Cleanup! + when VERBOSE do fmt.printf("Struct before cleanup:\n%#v\n", output) + + // Stacking tracks to the left + for &line, l in output.table { + stacking_index := first_channel_index + for &field, f in line[first_channel_index:last_channel_index+1] { + if field != "" { + line[stacking_index] = field + stacking_index += 1 + } + } + for &field, f in line[stacking_index:last_channel_index+1] { + field = "" + } + } + + + // Cleaning out unused columns + touched := make([]bool, output.column_count, context.temp_allocator) + // Finding them + for line, l in output.table { + for field, f in line { + if touched[f] do continue + if field != "" { + touched[f] = true + } + } + } + + // Turning unchanging columns into info line + changed := make([]bool, output.column_count, context.temp_allocator) + prev_line : []string = nil + for line, l in output.table { + if l>0 { + prev_line = output.table[l - 1] + for field, f in line { + if (prev_line[f] != field) || + (first_channel_index <= f && f <= last_channel_index) || + (f == output.tc_column_index) { + changed[f] = true + } + } + } + } + for did_change, i in changed { + if (!did_change) && touched[i] { + field := fmt.aprintf("{}: ", output.header[i], allocator=context.temp_allocator) + entry := prev_line[i] + output.info_lines[output.info_line_count] = {field=field, entry=entry} + output.info_line_count += 1 + } + } + + + // Removing unused and static + for &line, l in output.table { + stacking_index := 0 + for &field, f in line { + if touched[f] && changed[f] { + line[stacking_index] = field + stacking_index += 1 + } + } + for &field, f in line[stacking_index:] { + field = "" + } + } + stacking_index := 0 + for &field, f in output.header { + if touched[f] && changed[f] { + output.header[stacking_index] = field + stacking_index += 1 + } + } + for &field, f in output.header[stacking_index:] { + field = "" + } + + output.column_count = stacking_index + + // Setting title for report + output.title = strings.trim(filepath.base(filepath.dir(paths[0])), "/\\") + for item in output.info_lines { + if item.field == "Tape" { + output.title = item.entry + } + } + + // Setting column to sort by + for title, i in output.header { + if title == "Timecode" { + output.tc_column_index = i + break + } + } + + when VERBOSE do fmt.printf("Struct before output:\n%#v\n", output) + + output.path = fmt.tprintf("{}/{}_Knekt_Lydrapport.html", filepath.dir(paths[0]), output.title) + + return output, true } -parse_file :: proc(path : string, device : Device = .UNSET) -> (Report, bool) { + + + +parse_file :: proc(path : CSV, device : Device = .UNSET) -> (Report, bool) { device := device output : Report = {} data, ok := os.read_entire_file(path, context.temp_allocator) @@ -158,20 +412,20 @@ parse_file :: proc(path : string, device : Device = .UNSET) -> (Report, bool) { if (device!=.UNSET) { break } if line == "\"SOUND REPORT\"," { device = .ZOOM - if VERBOSE do fmt.printf("Detected ZOOM from quotes and comma on line index {}\n", line_number) + when VERBOSE do fmt.printf("Detected ZOOM from quotes and comma on line index {}\n", line_number) } if line == "\"ZOOM F8\"," { device = .ZOOM - if VERBOSE do fmt.printf("Detected ZOOM from \"ZOOM F8\" on line index {}\n", line_number) + when VERBOSE do fmt.printf("Detected ZOOM from \"ZOOM F8\" on line index {}\n", line_number) } if line == "SOUND REPORT" { device = .SD6 - if VERBOSE do fmt.printf("Detected SOUND_DEVICES from unquoted SOUND REPORT line index {}\n", line_number) + when VERBOSE do fmt.printf("Detected SOUND_DEVICES from unquoted SOUND REPORT line index {}\n", line_number) } if len(line)<15 do continue if line[:13] == "SOUND REPORT," { device = .SD8 - if VERBOSE do fmt.printf("Detected SOUND_DEVICES 8-series from SOUND REPORT with missing newline on line index {}\n", line_number) + when VERBOSE do fmt.printf("Detected SOUND_DEVICES 8-series from SOUND REPORT with missing newline on line index {}\n", line_number) } } @@ -271,7 +525,7 @@ parse_file :: proc(path : string, device : Device = .UNSET) -> (Report, bool) { // STAGE 3 -------------------------------------------------------------- // Filling with data - if VERBOSE do fmt.printf("Struct before main parse:\n%#v\n", output) + when VERBOSE do fmt.printf("Struct before main parse:\n%#v\n", output) first_channel_index := -1 last_channel_index := -1 @@ -304,7 +558,7 @@ parse_file :: proc(path : string, device : Device = .UNSET) -> (Report, bool) { stage = .INFO line_elements := strings.split(line, ",") - if VERBOSE do fmt.printf(".INFO {}: {}\n", line_index, line_elements) + when VERBOSE do fmt.printf(".INFO {}: {}\n", line_index, line_elements) field := fmt.aprintf("{}:", line_elements[1], allocator=context.temp_allocator) entry := line_elements[2] output.info_lines[info_line_index].field = field @@ -318,9 +572,9 @@ parse_file :: proc(path : string, device : Device = .UNSET) -> (Report, bool) { continue } line_elements := strings.split(line, ",") - if VERBOSE do fmt.printf(".INFO {}: {}\n", line_index, line_elements) + when VERBOSE do fmt.printf(".INFO {}: {}\n", line_index, line_elements) if line_elements[0] == "Date" { - if VERBOSE do fmt.printf("Skipping line {}, because it's the retarded date field on an 8-series\n", line_index) + when VERBOSE do fmt.printf("Skipping line {}, because it's the retarded date field on an 8-series\n", line_index) output.info_line_count -= 1 continue } @@ -335,10 +589,10 @@ parse_file :: proc(path : string, device : Device = .UNSET) -> (Report, bool) { if line == "," { continue // This is here because there are a bunch of lines that are just commas before the header } else if len(line)>3 { - if VERBOSE do fmt.printf(".HEADER {}:", line_index) + when VERBOSE do fmt.printf(".HEADER {}:", line_index) // No trailing comma in the header?? for element, e in strings.split(line, ",") { - if VERBOSE do fmt.printf(" {}", element) + when VERBOSE do fmt.printf(" {}", element) output.header[e] = element if element[:3] == "Trk" { @@ -351,22 +605,22 @@ parse_file :: proc(path : string, device : Device = .UNSET) -> (Report, bool) { } } - if VERBOSE do fmt.printf("\n") - if VERBOSE do fmt.printf("first_channel_index: {}\n", first_channel_index) - if VERBOSE do fmt.printf("last_channel_index: {}\n", last_channel_index) + when VERBOSE do fmt.printf("\n") + when VERBOSE do fmt.printf("first_channel_index: {}\n", first_channel_index) + when VERBOSE do fmt.printf("last_channel_index: {}\n", last_channel_index) stage = .BODY } case .BODY: if len(line) > 2 { - if VERBOSE do fmt.printf(".BODY {}:", line_index) + when VERBOSE do fmt.printf(".BODY {}:", line_index) for element, e in strings.split(line, ",") { - if VERBOSE do fmt.printf(" {}", element) + when VERBOSE do fmt.printf(" {}", element) entry : string = element output.table[body_line_index][e] = entry } - if VERBOSE do fmt.printf("\n") + when VERBOSE do fmt.printf("\n") body_line_index += 1 } @@ -409,7 +663,7 @@ parse_file :: proc(path : string, device : Device = .UNSET) -> (Report, bool) { continue } line_elements := strings.split(line, ",") - if VERBOSE do fmt.printf(".INFO {}: {}\n", line_index, line_elements) + when VERBOSE do fmt.printf(".INFO {}: {}\n", line_index, line_elements) field := line_elements[0] entry_raw := line_elements[1] entry := line_elements[1][1:len(entry_raw)-1] @@ -422,10 +676,10 @@ parse_file :: proc(path : string, device : Device = .UNSET) -> (Report, bool) { if line == "," { // This is here because there are a bunch of lines that are just commas before the header } else if len(line)>3 { - if VERBOSE do fmt.printf(".HEADER {}:", line_index) + when VERBOSE do fmt.printf(".HEADER {}:", line_index) // No trailing comma in the header?? for element, e in strings.split(line, ",") { - if VERBOSE do fmt.printf(" {}", element) + when VERBOSE do fmt.printf(" {}", element) output.header[e] = element if element[:4] == "Trk " { @@ -438,20 +692,20 @@ parse_file :: proc(path : string, device : Device = .UNSET) -> (Report, bool) { } } - if VERBOSE do fmt.printf("\n") + when VERBOSE do fmt.printf("\n") } else if line == "" { stage = .BODY - if VERBOSE do fmt.printf("first_channel_index: {}\n", first_channel_index) - if VERBOSE do fmt.printf("last_channel_index: {}\n", last_channel_index) + when VERBOSE do fmt.printf("first_channel_index: {}\n", first_channel_index) + when VERBOSE do fmt.printf("last_channel_index: {}\n", last_channel_index) } case .BODY: if len(line) > 2 { - if VERBOSE do fmt.printf(".BODY {}:", line_index) + when VERBOSE do fmt.printf(".BODY {}:", line_index) // to skip empty entry after trailing comma we do a silly slice for element, e in strings.split(line, ",")[:output.column_count] { - if VERBOSE do fmt.printf(" {}", element) + when VERBOSE do fmt.printf(" {}", element) entry : string = element // Stripping quotes if after tracks begin if e >= first_channel_index && (len(element)>0) { @@ -459,7 +713,7 @@ parse_file :: proc(path : string, device : Device = .UNSET) -> (Report, bool) { } output.table[body_line_index][e] = entry } - if VERBOSE do fmt.printf("\n") + when VERBOSE do fmt.printf("\n") body_line_index += 1 } @@ -506,7 +760,7 @@ parse_file :: proc(path : string, device : Device = .UNSET) -> (Report, bool) { continue } line_elements := strings.split(line, ",") - if VERBOSE do fmt.printf(".INFO {}: {}\n", line_index, line_elements) + when VERBOSE do fmt.printf(".INFO {}: {}\n", line_index, line_elements) field_raw := line_elements[0] entry_raw := line_elements[1] field := line_elements[0][1:len(field_raw)-1] @@ -516,10 +770,10 @@ parse_file :: proc(path : string, device : Device = .UNSET) -> (Report, bool) { info_line_index += 1 case .HEADER: - if VERBOSE do fmt.printf(".HEADER {}:", line_index) + when VERBOSE do fmt.printf(".HEADER {}:", line_index) // to skip empty entry after trailing comma we do a silly slice for element, e in strings.split(line, ",")[:output.column_count] { - if VERBOSE do fmt.printf(" {}", element) + when VERBOSE do fmt.printf(" {}", element) output.header[e] = element[1:len(element)-1] if element[:4] == "\"Tr " { @@ -531,20 +785,20 @@ parse_file :: proc(path : string, device : Device = .UNSET) -> (Report, bool) { output.tc_column_index = e } } - if VERBOSE do fmt.printf("\n") + when VERBOSE do fmt.printf("\n") stage = .BODY - if VERBOSE do fmt.printf("first_channel_index: {}\n", first_channel_index) - if VERBOSE do fmt.printf("last_channel_index: {}\n", last_channel_index) + when VERBOSE do fmt.printf("first_channel_index: {}\n", first_channel_index) + when VERBOSE do fmt.printf("last_channel_index: {}\n", last_channel_index) case .BODY: if line == "" do break - if VERBOSE do fmt.printf(".BODY {}:", line_index) + when VERBOSE do fmt.printf(".BODY {}:", line_index) // to skip empty entry after trailing comma we do a silly slice for element, e in strings.split(line, ",")[:output.column_count] { - if VERBOSE do fmt.printf(" {}", element) + when VERBOSE do fmt.printf(" {}", element) output.table[body_line_index][e] = element[1:len(element)-1] } - if VERBOSE do fmt.printf("\n") + when VERBOSE do fmt.printf("\n") body_line_index += 1 } } @@ -555,7 +809,7 @@ parse_file :: proc(path : string, device : Device = .UNSET) -> (Report, bool) { // STAGE 4 -------------------------------------------------------------- // Cleanup! - if VERBOSE do fmt.printf("Struct before cleanup:\n%#v\n", output) + when VERBOSE do fmt.printf("Struct before cleanup:\n%#v\n", output) // Stacking tracks to the left for &line, l in output.table { @@ -635,13 +889,15 @@ parse_file :: proc(path : string, device : Device = .UNSET) -> (Report, bool) { output.column_count = stacking_index - if VERBOSE do fmt.printf("Struct before output:\n%#v\n", output) + when VERBOSE do fmt.printf("Struct before output:\n%#v\n", output) + + output.path = fmt.tprintf("{}/{}_Knekt_Lydrapport.html", filepath.dir(path), output.title) return output, true } -render :: proc(report : Report, path : string) { +render :: proc(report : Report) { // Now we output the HTML. builder := strings.builder_make(context.temp_allocator) @@ -687,9 +943,9 @@ render :: proc(report : Report, path : string) { strings.write_string(&builder, PART_END) output_text := strings.to_string(builder) - os.write_entire_file(path, transmute([]u8)output_text) + os.write_entire_file(report.path, transmute([]u8)output_text) - fmt.printf("Output: {}\n", path) + fmt.printf("Output: {}\n", report.path) } indent_by :: proc(i : int) { @@ -698,7 +954,7 @@ indent_by :: proc(i : int) { } } -walk_directory :: proc(path : string, file_number : ^int, job_list : ^[dynamic]string, depth : int = 0) -> bool { +walk_directory :: proc(path : string, file_number : ^int, depth : int = 0) -> bool { handle, ok := os.open(path) if ok != os.ERROR_NONE { indent_by(depth) @@ -715,7 +971,7 @@ walk_directory :: proc(path : string, file_number : ^int, job_list : ^[dynamic]s return true } - wav_count := 0 + wav_files : [dynamic]string has_csv := false for file in files { @@ -725,7 +981,7 @@ walk_directory :: proc(path : string, file_number : ^int, job_list : ^[dynamic]s if file.is_dir { indent_by(depth) fmt.printf("š %s\n", file.name) - walk_directory(full_path, file_number, job_list, depth+1) // Recurse + walk_directory(full_path, file_number, depth+1) // Recurse } else { // If file is actually a file @@ -734,29 +990,30 @@ walk_directory :: proc(path : string, file_number : ^int, job_list : ^[dynamic]s if extension == ".csv" { indent_by(depth) fmt.printf("š [#%d] %s\n", file_number^, file.name) - append(job_list, strings.clone(file.fullpath)) + append(&job_list, strings.clone(file.fullpath)) file_number^ += 1 has_csv = true } if extension == ".wav" { - wav_count += 1 + append(&wav_files, strings.clone(full_path)) } } } + wav_count := len(wav_files) if wav_count>0 && !has_csv { - indent_by(depth+1) + indent_by(depth) if wav_count == 1 { - fmt.printf("š½ [#%d] 1 WAV file.\n", file_number^) + fmt.printf("š½ [#%d] A WAV file.\n", file_number^) } else { fmt.printf("š½ [#%d] %d WAV files.\n", file_number^, wav_count) } - append(job_list, strings.clone(path)) + append(&job_list, wav_files) file_number^ += 1 } return false } -walk_directory_os2 :: proc(path : string, file_number : ^int, job_list : ^[dynamic]string, depth : int = 0) { +walk_directory_os2 :: proc(path : string, file_number : ^int, depth : int = 0) { handle, ok := os2.open(path) if ok != os2.ERROR_NONE { indent_by(depth) @@ -772,7 +1029,7 @@ walk_directory_os2 :: proc(path : string, file_number : ^int, job_list : ^[dynam return } - wav_count := 0 + wav_files : [dynamic]string has_csv := false for file in files { @@ -782,7 +1039,7 @@ walk_directory_os2 :: proc(path : string, file_number : ^int, job_list : ^[dynam if os.is_dir(full_path) { indent_by(depth) fmt.printf("š %s\n", file.name) - walk_directory_os2(full_path, file_number, job_list, depth+1) // Recurse + walk_directory_os2(full_path, file_number, depth+1) // Recurse } else { // If file is actually a file @@ -791,15 +1048,16 @@ walk_directory_os2 :: proc(path : string, file_number : ^int, job_list : ^[dynam if extension == ".csv" { indent_by(depth) fmt.printf("š [#%d] %s\n", file_number^, file.name) - append(job_list, strings.clone(file.fullpath)) + append(&job_list, strings.clone(file.fullpath)) file_number^ += 1 has_csv = true } if extension == ".wav" { - wav_count += 1 + append(&wav_files, strings.clone(full_path)) } } } + wav_count := len(wav_files) if wav_count>0 && !has_csv { indent_by(depth+1) if wav_count == 1 { @@ -807,7 +1065,7 @@ walk_directory_os2 :: proc(path : string, file_number : ^int, job_list : ^[dynam } else { fmt.printf("š½ [#%d] %d WAV files.\n", file_number^, wav_count) } - append(job_list, strings.clone(path)) + append(&job_list, wav_files) file_number^ += 1 } }
\ No newline at end of file diff --git a/src/wav/wav.odin b/src/wav/wav.odin index 01fbad1..41667b5 100644 --- a/src/wav/wav.odin +++ b/src/wav/wav.odin @@ -5,22 +5,25 @@ import "core:math" import "core:strings" import "core:strconv" import "core:os" -import "core:encoding/xml" +import "xml" Wav :: struct { // Basic data path : string, - handle : os.Handle, format : Audio_Format, channels : int, sample_rate : int, bit_depth : int, reported_size : u32, + // Internals + handle : os.Handle, + // Metadata + date : Date, channel_names : []string, samples_since_midnight: u64, - timecode : Timecode, + timecode : Timecode, // Derived from samples_since_midnight tc_framerate : f32, tc_dropframe : bool, ubits : [8]u8, @@ -32,7 +35,7 @@ Wav :: struct { circled : bool, } Audio_Format :: enum { - PCM = 1, + INT = 1, FLOAT = 3, } Timecode :: struct { @@ -41,23 +44,29 @@ Timecode :: struct { second : u8, frame : f32, } +Date :: struct { + year, month, day : int, +} +VERBOSE :: false BUFFER_SIZE :: 1<<15 main :: proc() { - enok, enok_ok := read_wav("test/ENOKS-BIRHTDAYT02.WAV", context.temp_allocator) - fmt.printf("\n\nenok = %#v\n\n", enok) - prins, prins_ok := read_wav("test/KRONPRINS01T01.wav", context.temp_allocator) - fmt.printf("\n\nprins = %#v\n\n", prins) - f8, f8_ok := read_wav("test/F8-SL098-T001.WAV", context.temp_allocator) - fmt.printf("\n\nf8 = %#v\n\n", f8) + // Test + enok, enok_ok := read("test/WAVs/ENOKS-BIRHTDAYT02.WAV", context.temp_allocator) + when VERBOSE do fmt.printf("\n\nenok = %#v\n\n", enok) + prins, prins_ok := read("test/WAVs/KRONPRINS01T01.wav", context.temp_allocator) + when VERBOSE do fmt.printf("\n\nprins = %#v\n\n", prins) + f8, f8_ok := read("test/WAVs/F8-SL098-T001.WAV", context.temp_allocator) + when VERBOSE do fmt.printf("\n\nf8 = %#v\n\n", f8) } /* -Reads in the wav file data, including metadata. +Reads in the wav file metadata, without loading the sound data into ram. */ -read_wav :: proc(path : string, allocator:=context.allocator) -> (Wav, bool) { +read :: proc(path : string, allocator:=context.allocator) -> (Wav, bool) #optional_ok { file : Wav + file.path = path load_err : os.Error file.handle, load_err = os.open(path) @@ -79,21 +88,21 @@ read_wav :: proc(path : string, allocator:=context.allocator) -> (Wav, bool) { head : int = 0 // RIFF header - fmt.println(string(temp_buf[0:4])) + when VERBOSE do fmt.println(string(temp_buf[0:4])) if string(temp_buf[0:4]) != "RIFF" do return {}, false head += 4 // Size file.reported_size = read_little_endian_u32(temp_buf[head:head+4]) - fmt.println("Reported size:", file.reported_size) + when VERBOSE do fmt.println("Reported size:", file.reported_size) head += 4 // Confirming again that this is a wave file - fmt.println(string(temp_buf[head:head+4])) + when VERBOSE do fmt.println(string(temp_buf[head:head+4])) if string(temp_buf[head:head+4]) != "WAVE" do return {}, false head += 4 - fmt.println("\nChunks:\n") + when VERBOSE do fmt.println("\nChunks:\n") // Looping through chunks null_chunks := 0 @@ -103,7 +112,7 @@ read_wav :: proc(path : string, allocator:=context.allocator) -> (Wav, bool) { head += 4 chunk_size := int(read_little_endian_u32(temp_buf[head:head+4])) head += 4 - fmt.println(chunk_id, chunk_size,"\n-------------------------------------") + when VERBOSE do fmt.println(chunk_id, chunk_size,"\n-------------------------------------") data_reached := false next_chunk_start := head + chunk_size @@ -122,13 +131,13 @@ read_wav :: proc(path : string, allocator:=context.allocator) -> (Wav, bool) { null_chunks = 0 case "fmt ": file.format = Audio_Format(read_little_endian_u16(temp_buf[head:])) - fmt.println("Format:", file.format) + when VERBOSE do fmt.println("Format:", file.format) head += 2 file.channels = int(read_little_endian_u16(temp_buf[head:])) - fmt.println("Channels:", file.channels) + when VERBOSE do fmt.println("Channels:", file.channels) head += 2 file.sample_rate = int(read_little_endian_u32(temp_buf[head:])) - fmt.println("Sample rate:", file.sample_rate) + when VERBOSE do fmt.println("Sample rate:", file.sample_rate) head += 4 // Skipping byte rate and block align. @@ -137,7 +146,7 @@ read_wav :: proc(path : string, allocator:=context.allocator) -> (Wav, bool) { head += 4 + 2 file.bit_depth = int(read_little_endian_u16(temp_buf[head:])) - fmt.println("Bit depth:", file.bit_depth) + when VERBOSE do fmt.println("Bit depth:", file.bit_depth) head += 2 head = data_end null_chunks = 0 @@ -148,9 +157,9 @@ read_wav :: proc(path : string, allocator:=context.allocator) -> (Wav, bool) { null_chunks += 1 } } - fmt.println(print_data, "\n") + when VERBOSE do fmt.println(print_data, "\n") } else { - fmt.println("End of buffer reached.") + when VERBOSE do fmt.println("End of buffer reached.") break } @@ -158,11 +167,11 @@ read_wav :: proc(path : string, allocator:=context.allocator) -> (Wav, bool) { head = next_chunk_start if null_chunks > 3 { - fmt.println("Got more than 3 null chunks in a row. Quitting parse.") + when VERBOSE do fmt.println("Got more than 3 null chunks in a row. Quitting parse.") break } if data_reached { - fmt.println("Data reached.") + when VERBOSE do fmt.println("Data reached.") } } @@ -202,13 +211,13 @@ read_wav :: proc(path : string, allocator:=context.allocator) -> (Wav, bool) { } } - fmt.printf("\n") - tab(indent) + when VERBOSE do fmt.printf("\n") + when VERBOSE do tab(indent) element := doc.elements[element_id] if element.kind == .Element { - fmt.printf("<%v>", element.ident) + when VERBOSE do fmt.printf("<%v>", element.ident) if len(element.value) > 0 { value := element.value[0] @@ -301,18 +310,18 @@ read_wav :: proc(path : string, allocator:=context.allocator) -> (Wav, bool) { for value in element.value { switch v in value { case string: - fmt.printf(": %v", v) + when VERBOSE do fmt.printf(": %v", v) case xml.Element_ID: xml_recurse(doc, v, file, naming_channel, interleave_set, allocator, indent + 1) } } for attr in element.attribs { - tab(indent + 1) - fmt.printf("[Attr] %v: %v\n", attr.key, attr.val) + when VERBOSE do tab(indent + 1) + when VERBOSE do fmt.printf("[Attr] %v: %v\n", attr.key, attr.val) } } else if element.kind == .Comment { - fmt.printf("[COMMENT] %v\n", element.value) + when VERBOSE do fmt.printf("[COMMENT] %v\n", element.value) } return @@ -382,15 +391,21 @@ read_wav :: proc(path : string, allocator:=context.allocator) -> (Wav, bool) { } } head := 0 - fmt.printf("Description: \n%v\n", string(temp_bext[head:256])) + when VERBOSE do fmt.printf("Description: \n%v\n", string(temp_bext[head:256])) head += 256 - fmt.printf("Originator: %v\n", string(temp_bext[head:head+32])) + when VERBOSE do fmt.printf("Originator: %v\n", string(temp_bext[head:head+32])) head += 32 - fmt.printf("Originator Reference: %v\n", string(temp_bext[head:head+32])) + when VERBOSE do fmt.printf("Originator Reference: %v\n", string(temp_bext[head:head+32])) head += 32 - fmt.printf("Origination Date: %v\n", string(temp_bext[head:head+10])) + date := string(temp_bext[head:head+10]) + when VERBOSE do fmt.printf("Origination Date: %v\n", date) + date_splits := strings.split(date, "-") + file.date.year, _ = strconv.parse_int(date_splits[0]) + file.date.month, _ = strconv.parse_int(date_splits[1]) + file.date.day, _ = strconv.parse_int(date_splits[2]) + delete(date_splits) head += 10 - fmt.printf("Origination Time: %v\n", string(temp_bext[head:head+8])) + when VERBOSE do fmt.printf("Origination Time: %v\n", string(temp_bext[head:head+8])) head += 8 file.samples_since_midnight = read_little_endian_u64(temp_bext[head:head+8]) @@ -400,19 +415,19 @@ read_wav :: proc(path : string, allocator:=context.allocator) -> (Wav, bool) { file.timecode.minute = u8((seconds_since_midnight % 3600) / 60) file.timecode.second = u8( seconds_since_midnight % 60) file.timecode.frame = f32( f64(file.samples_since_midnight % u64(file.sample_rate) ) * f64(file.tc_framerate) / f64(file.sample_rate)) - fmt.printf("Time Reference: %v (Samples since midnight, source of timecode)\n", file.samples_since_midnight) - fmt.printf(" %v seconds + %v samples\n", seconds_since_midnight, file.samples_since_midnight % u64(file.sample_rate)) + when VERBOSE do fmt.printf("Time Reference: %v (Samples since midnight, source of timecode)\n", file.samples_since_midnight) + when VERBOSE do fmt.printf(" %v seconds + %v samples\n", seconds_since_midnight, file.samples_since_midnight % u64(file.sample_rate)) head += 8 - fmt.printf("Version: %v\n", read_little_endian_u16(temp_bext[head:head+2])) + when VERBOSE do fmt.printf("Version: %v\n", read_little_endian_u16(temp_bext[head:head+2])) head += 2 - fmt.printf("UMID Skipped.\n") + when VERBOSE do fmt.printf("UMID Skipped.\n") head += 64 - fmt.printf("Skipped reserved nothingness.\n") + when VERBOSE do fmt.printf("Skipped reserved nothingness.\n") head += 190 - fmt.printf("Coding history:\n%v\n", string(temp_bext[head:])) + when VERBOSE do fmt.printf("Coding history:\n%v\n", string(temp_bext[head:])) } - fmt.println() + when VERBOSE do fmt.println() // just here to make some printing prettier temp_bext = nil diff --git a/src/wav/xml/debug_print.odin b/src/wav/xml/debug_print.odin new file mode 100644 index 0000000..9c47e79 --- /dev/null +++ b/src/wav/xml/debug_print.odin @@ -0,0 +1,86 @@ +package encoding_xml + +/* + An XML 1.0 / 1.1 parser + + Copyright 2021-2022 Jeroen van Rijn <nom@duclavier.com>. + Made available under Odin's license. + + A from-scratch XML implementation, loosely modeled on the [spec](https://www.w3.org/TR/2006/REC-xml11-20060816). + + List of contributors: + Jeroen van Rijn: Initial implementation. +*/ + + +import "core:io" +import "core:fmt" + +/* + Just for debug purposes. +*/ +print :: proc(writer: io.Writer, doc: ^Document) -> (written: int, err: io.Error) { + if doc == nil { return } + written += fmt.wprintf(writer, "[XML Prolog]\n") + + for attr in doc.prologue { + written += fmt.wprintf(writer, "\t%v: %v\n", attr.key, attr.val) + } + + written += fmt.wprintf(writer, "[Encoding] %v\n", doc.encoding) + + if len(doc.doctype.ident) > 0 { + written += fmt.wprintf(writer, "[DOCTYPE] %v\n", doc.doctype.ident) + + if len(doc.doctype.rest) > 0 { + fmt.wprintf(writer, "\t%v\n", doc.doctype.rest) + } + } + + for comment in doc.comments { + written += fmt.wprintf(writer, "[Pre-root comment] %v\n", comment) + } + + if len(doc.elements) > 0 { + fmt.wprintln(writer, " --- ") + print_element(writer, doc, 0) + fmt.wprintln(writer, " --- ") + } + + return written, .None +} + +print_element :: proc(writer: io.Writer, doc: ^Document, element_id: Element_ID, indent := 0) -> (written: int, err: io.Error) { + tab :: proc(writer: io.Writer, indent: int) { + for _ in 0..=indent { + fmt.wprintf(writer, "\t") + } + } + + tab(writer, indent) + + element := doc.elements[element_id] + + if element.kind == .Element { + fmt.wprintf(writer, "<%v>\n", element.ident) + + for value in element.value { + switch v in value { + case string: + tab(writer, indent + 1) + fmt.wprintf(writer, "[Value] %v\n", v) + case Element_ID: + print_element(writer, doc, v, indent + 1) + } + } + + for attr in element.attribs { + tab(writer, indent + 1) + fmt.wprintf(writer, "[Attr] %v: %v\n", attr.key, attr.val) + } + } else if element.kind == .Comment { + fmt.wprintf(writer, "[COMMENT] %v\n", element.value) + } + + return written, .None +} diff --git a/src/wav/xml/doc.odin b/src/wav/xml/doc.odin new file mode 100644 index 0000000..9030cd4 --- /dev/null +++ b/src/wav/xml/doc.odin @@ -0,0 +1,23 @@ +/* +A parser for a useful subset of the `XML` specification. + +A from-scratch `XML` implementation, loosely modelled on the [[ spec; https://www.w3.org/TR/2006/REC-xml11-20060816 ]]. + +Features: +- Supports enough of the XML 1.0/1.1 spec to handle the 99.9% of XML documents in common current usage. +- Simple to understand and use. Small. + +Caveats: +- We do NOT support HTML in this package, as that may or may not be valid XML. + If it works, great. If it doesn't, that's not considered a bug. + +- We do NOT support `UTF-16`. If you have a `UTF-16` XML file, please convert it to `UTF-8` first. Also, our condolences. +- `<[!ELEMENT` and `<[!ATTLIST` are not supported, and will be either ignored or return an error depending on the parser options. + +MAYBE: +- XML writer? +- Serialize/deserialize Odin types? + +For a full example, see: [[ core/encoding/xml/example; https://github.com/odin-lang/Odin/tree/master/core/encoding/xml/example ]] +*/ +package encoding_xml diff --git a/src/wav/xml/helpers.odin b/src/wav/xml/helpers.odin new file mode 100644 index 0000000..79f2d72 --- /dev/null +++ b/src/wav/xml/helpers.odin @@ -0,0 +1,52 @@ +package encoding_xml + +/* + An XML 1.0 / 1.1 parser + + Copyright 2021-2022 Jeroen van Rijn <nom@duclavier.com>. + Made available under Odin's license. + + This file contains helper functions. +*/ + + +// Find parent's nth child with a given ident. +find_child_by_ident :: proc(doc: ^Document, parent_id: Element_ID, ident: string, nth := 0) -> (res: Element_ID, found: bool) { + tag := doc.elements[parent_id] + + count := 0 + for v in tag.value { + switch child_id in v { + case string: continue + case Element_ID: + child := doc.elements[child_id] + /* + Skip commments. They have no name. + */ + if child.kind != .Element { continue } + + /* + If the ident matches and it's the nth such child, return it. + */ + if child.ident == ident { + if count == nth { return child_id, true } + count += 1 + } + } + + } + return 0, false +} + +// Find an attribute by key. +find_attribute_val_by_key :: proc(doc: ^Document, parent_id: Element_ID, key: string) -> (val: string, found: bool) { + tag := doc.elements[parent_id] + + for attr in tag.attribs { + /* + If the ident matches, we're done. There can only ever be one attribute with the same name. + */ + if attr.key == key { return attr.val, true } + } + return "", false +} diff --git a/src/wav/xml/tokenizer.odin b/src/wav/xml/tokenizer.odin new file mode 100644 index 0000000..f4c9c8a --- /dev/null +++ b/src/wav/xml/tokenizer.odin @@ -0,0 +1,415 @@ +package encoding_xml + +/* + An XML 1.0 / 1.1 parser + + Copyright 2021-2022 Jeroen van Rijn <nom@duclavier.com>. + Made available under Odin's license. + + A from-scratch XML implementation, loosely modeled on the [spec](https://www.w3.org/TR/2006/REC-xml11-20060816). + + List of contributors: + Jeroen van Rijn: Initial implementation. +*/ + + +import "core:fmt" +import "core:unicode" +import "core:unicode/utf8" +import "core:strings" + +Error_Handler :: #type proc(pos: Pos, fmt: string, args: ..any) + +Token :: struct { + kind: Token_Kind, + text: string, + pos: Pos, +} + +Pos :: struct { + file: string, + offset: int, // starting at 0 + line: int, // starting at 1 + column: int, // starting at 1 +} + +Token_Kind :: enum { + Invalid, + + Ident, + Literal, + Rune, + String, + + Double_Quote, // " + Single_Quote, // ' + Colon, // : + + Eq, // = + Lt, // < + Gt, // > + Exclaim, // ! + Question, // ? + Hash, // # + Slash, // / + Dash, // - + + Open_Bracket, // [ + Close_Bracket, // ] + + EOF, +} + +CDATA_START :: "<![CDATA[" +CDATA_END :: "]]>" + +COMMENT_START :: "<!--" +COMMENT_END :: "-->" + +Tokenizer :: struct { + // Immutable data + path: string, + src: string, + err: Error_Handler, + + // Tokenizing state + ch: rune, + offset: int, + read_offset: int, + line_offset: int, + line_count: int, + + // Mutable data + error_count: int, +} + +init :: proc(t: ^Tokenizer, src: string, path: string, err: Error_Handler = default_error_handler) { + t.src = src + t.err = err + t.ch = ' ' + t.offset = 0 + t.read_offset = 0 + t.line_offset = 0 + t.line_count = len(src) > 0 ? 1 : 0 + t.error_count = 0 + t.path = path + + advance_rune(t) + if t.ch == utf8.RUNE_BOM { + advance_rune(t) + } +} + +@(private) +offset_to_pos :: proc(t: ^Tokenizer, offset: int) -> Pos { + line := t.line_count + column := offset - t.line_offset + 1 + + return Pos { + file = t.path, + offset = offset, + line = line, + column = column, + } +} + +default_error_handler :: proc(pos: Pos, msg: string, args: ..any) { + fmt.eprintf("%s(%d:%d) ", pos.file, pos.line, pos.column) + fmt.eprintf(msg, ..args) + fmt.eprintf("\n") +} + +error :: proc(t: ^Tokenizer, offset: int, msg: string, args: ..any) { + pos := offset_to_pos(t, offset) + if t.err != nil { + t.err(pos=pos, fmt=msg, args=args) + } + t.error_count += 1 +} + +@(optimization_mode="favor_size") +advance_rune :: proc(t: ^Tokenizer) { + #no_bounds_check { + /* + Already bounds-checked here. + */ + if t.read_offset < len(t.src) { + t.offset = t.read_offset + if t.ch == '\n' { + t.line_offset = t.offset + t.line_count += 1 + } + r, w := rune(t.src[t.read_offset]), 1 + switch { + case r == 0: + //error(t, t.offset, "illegal character NUL") + case r >= utf8.RUNE_SELF: + r, w = #force_inline utf8.decode_rune_in_string(t.src[t.read_offset:]) + if r == utf8.RUNE_ERROR && w == 1 { + //error(t, t.offset, "illegal UTF-8 encoding") + } else if r == utf8.RUNE_BOM && t.offset > 0 { + //error(t, t.offset, "illegal byte order mark") + } + } + t.read_offset += w + t.ch = r + } else { + t.offset = len(t.src) + if t.ch == '\n' { + t.line_offset = t.offset + t.line_count += 1 + } + t.ch = -1 + } + } +} + +peek_byte :: proc(t: ^Tokenizer, offset := 0) -> byte { + if t.read_offset+offset < len(t.src) { + #no_bounds_check return t.src[t.read_offset+offset] + } + return 0 +} + +@(optimization_mode="favor_size") +skip_whitespace :: proc(t: ^Tokenizer) { + for { + switch t.ch { + case ' ', '\t', '\r', '\n': + advance_rune(t) + case: + return + } + } +} + +@(optimization_mode="favor_size") +is_letter :: proc(r: rune) -> bool { + if r < utf8.RUNE_SELF { + switch r { + case '_': + return true + case 'A'..='Z', 'a'..='z': + return true + } + } + return unicode.is_letter(r) +} + +is_valid_identifier_rune :: proc(r: rune) -> bool { + if r < utf8.RUNE_SELF { + switch r { + case '_', '-', ':': return true + case 'A'..='Z', 'a'..='z': return true + case '0'..='9': return true + case -1: return false + } + } + + if unicode.is_letter(r) || unicode.is_digit(r) { + return true + } + return false +} + +scan_identifier :: proc(t: ^Tokenizer) -> string { + offset := t.offset + namespaced := false + + for is_valid_identifier_rune(t.ch) { + advance_rune(t) + if t.ch == ':' { + // A namespaced attr can have at most two parts, `namespace:ident`. + if namespaced { + break + } + namespaced = true + } + } + return string(t.src[offset : t.offset]) +} + +/* + A comment ends when we see -->, preceded by a character that's not a dash. + "For compatibility, the string "--" (double-hyphen) must not occur within comments." + + See: https://www.w3.org/TR/2006/REC-xml11-20060816/#dt-comment + + Thanks to the length (4) of the comment start, we also have enough lookback, + and the peek at the next byte asserts that there's at least one more character + that's a `>`. +*/ +scan_comment :: proc(t: ^Tokenizer) -> (comment: string, err: Error) { + offset := t.offset + + for { + advance_rune(t) + ch := t.ch + + if ch < 0 { + //error(t, offset, "[parse] Comment was not terminated\n") + return "", .Unclosed_Comment + } + + if string(t.src[t.offset - 1:][:2]) == "--" { + if peek_byte(t) == '>' { + break + } else { + //error(t, t.offset - 1, "Invalid -- sequence in comment.\n") + return "", .Invalid_Sequence_In_Comment + } + } + } + + expect(t, .Dash) + expect(t, .Gt) + + return string(t.src[offset : t.offset - 1]), .None +} + +// Skip CDATA +skip_cdata :: proc(t: ^Tokenizer) -> (err: Error) { + if s := string(t.src[t.offset:]); !strings.has_prefix(s, CDATA_START) { + return .None + } + + t.read_offset += len(CDATA_START) + offset := t.offset + + cdata_scan: for { + advance_rune(t) + if t.ch < 0 { + //error(t, offset, "[scan_string] CDATA was not terminated\n") + return .Premature_EOF + } + + // Scan until the end of a CDATA tag. + if s := string(t.src[t.read_offset:]); strings.has_prefix(s, CDATA_END) { + t.read_offset += len(CDATA_END) + break cdata_scan + } + } + return .None +} + +@(optimization_mode="favor_size") +scan_string :: proc(t: ^Tokenizer, offset: int, close: rune = '<', consume_close := false, multiline := true) -> (value: string, err: Error) { + err = .None + + loop: for { + ch := t.ch + + switch ch { + case -1: + //error(t, t.offset, "[scan_string] Premature end of file.\n") + return "", .Premature_EOF + + case '<': + if peek_byte(t) == '!' { + if peek_byte(t, 1) == '[' { + // Might be the start of a CDATA tag. + skip_cdata(t) or_return + } else if peek_byte(t, 1) == '-' && peek_byte(t, 2) == '-' { + // Comment start. Eat comment. + t.read_offset += 3 + _ = scan_comment(t) or_return + } + } + + case '\n': + if !multiline { + //error(t, offset, string(t.src[offset : t.offset])) + //error(t, offset, "[scan_string] Not terminated\n") + err = .Invalid_Tag_Value + break loop + } + } + + if t.ch == close { + // If it's not a CDATA or comment, it's the end of this body. + break loop + } + advance_rune(t) + } + + // Strip trailing whitespace. + lit := string(t.src[offset : t.offset]) + + end := len(lit) + eat: for ; end > 0; end -= 1 { + ch := lit[end - 1] + switch ch { + case ' ', '\t', '\r', '\n': + case: + break eat + } + } + lit = lit[:end] + + if consume_close { + advance_rune(t) + } + return lit, err +} + +peek :: proc(t: ^Tokenizer) -> (token: Token) { + old := t^ + token = scan(t) + t^ = old + return token +} + +scan :: proc(t: ^Tokenizer, multiline_string := false) -> Token { + skip_whitespace(t) + + offset := t.offset + + kind: Token_Kind + err: Error + lit: string + pos := offset_to_pos(t, offset) + + switch ch := t.ch; true { + case is_letter(ch): + lit = scan_identifier(t) + kind = .Ident + + case: + advance_rune(t) + switch ch { + case -1: + kind = .EOF + + case '<': kind = .Lt + case '>': kind = .Gt + case '!': kind = .Exclaim + case '?': kind = .Question + case '=': kind = .Eq + case '#': kind = .Hash + case '/': kind = .Slash + case '-': kind = .Dash + case ':': kind = .Colon + case '[': kind = .Open_Bracket + case ']': kind = .Close_Bracket + + case '"', '\'': + kind = .Invalid + + lit, err = scan_string(t, t.offset, ch, true, multiline_string) + if err == .None { + kind = .String + } + + case '\n': + lit = "\n" + + case: + kind = .Invalid + } + } + + if kind != .String && lit == "" { + lit = string(t.src[offset : t.offset]) + } + return Token{kind, lit, pos} +} diff --git a/src/wav/xml/xml_reader.odin b/src/wav/xml/xml_reader.odin new file mode 100644 index 0000000..c19cbf6 --- /dev/null +++ b/src/wav/xml/xml_reader.odin @@ -0,0 +1,628 @@ +package encoding_xml +/* + An XML 1.0 / 1.1 parser + + 2021-2022 Jeroen van Rijn <nom@duclavier.com>. + available under Odin's license. + + List of contributors: + - Jeroen van Rijn: Initial implementation. +*/ + +import "core:bytes" +import "core:encoding/entity" +import "base:intrinsics" +import "core:mem" +import "core:os" +import "core:strings" +import "base:runtime" + +likely :: intrinsics.expect + +DEFAULT_OPTIONS :: Options{ + flags = {.Ignore_Unsupported}, + expected_doctype = "", +} + +Option_Flag :: enum { + // If the caller says that input may be modified, we can perform in-situ parsing. + // If this flag isn't provided, the XML parser first duplicates the input so that it can. + Input_May_Be_Modified, + + // Document MUST start with `<?xml` prologue. + Must_Have_Prolog, + + // Document MUST have a `<!DOCTYPE`. + Must_Have_DocType, + + // By default we skip comments. Use this option to intern a comment on a parented Element. + Intern_Comments, + + // How to handle unsupported parts of the specification, like <! other than <!DOCTYPE and <![CDATA[ + Error_on_Unsupported, + Ignore_Unsupported, + + // By default CDATA tags are passed-through as-is. + // This option unwraps them when encountered. + Unbox_CDATA, + + // By default SGML entities like `>`, ` ` and ` ` are passed-through as-is. + // This option decodes them when encountered. + Decode_SGML_Entities, + + // If a tag body has a comment, it will be stripped unless this option is given. + Keep_Tag_Body_Comments, +} +Option_Flags :: bit_set[Option_Flag; u16] + +Document :: struct { + elements: [dynamic]Element `fmt:"v,element_count"`, + element_count: Element_ID, + + prologue: Attributes, + encoding: Encoding, + + doctype: struct { + // We only scan the <!DOCTYPE IDENT part and skip the rest. + ident: string, + rest: string, + }, + + // If we encounter comments before the root node, and the option to intern comments is given, this is where they'll live. + // Otherwise they'll be in the element tree. + comments: [dynamic]string `fmt:"-"`, + + // Internal + tokenizer: ^Tokenizer `fmt:"-"`, + allocator: mem.Allocator `fmt:"-"`, + + // Input. Either the original buffer, or a copy if `.Input_May_Be_Modified` isn't specified. + input: []u8 `fmt:"-"`, + strings_to_free: [dynamic]string `fmt:"-"`, +} + +Element :: struct { + ident: string, + value: [dynamic]Value, + attribs: Attributes, + + kind: enum { + Element = 0, + Comment, + }, + parent: Element_ID, +} + +Value :: union { + string, + Element_ID, +} + +Attribute :: struct { + key: string, + val: string, +} + +Attributes :: [dynamic]Attribute + +Options :: struct { + flags: Option_Flags, + expected_doctype: string, +} + +Encoding :: enum { + Unknown, + + UTF_8, + ISO_8859_1, + + // Aliases + LATIN_1 = ISO_8859_1, +} + +Error :: enum { + // General return values. + None = 0, + General_Error, + Unexpected_Token, + Invalid_Token, + + // Couldn't find, open or read file. + File_Error, + + // File too short. + Premature_EOF, + + // XML-specific errors. + No_Prolog, + Invalid_Prolog, + Too_Many_Prologs, + + No_DocType, + Too_Many_DocTypes, + DocType_Must_Preceed_Elements, + + // If a DOCTYPE is present _or_ the caller + // asked for a specific DOCTYPE and the DOCTYPE + // and root tag don't match, we return `.Invalid_DocType`. + Invalid_DocType, + + Invalid_Tag_Value, + Mismatched_Closing_Tag, + + Unclosed_Comment, + Comment_Before_Root_Element, + Invalid_Sequence_In_Comment, + + Unsupported_Version, + Unsupported_Encoding, + + // <!FOO are usually skipped. + Unhandled_Bang, + + Duplicate_Attribute, + Conflicting_Options, +} + +parse_bytes :: proc(data: []u8, options := DEFAULT_OPTIONS, path := "", error_handler := default_error_handler, allocator := context.allocator) -> (doc: ^Document, err: Error) { + data := data + context.allocator = allocator + + opts := validate_options(options) or_return + + // If `.Input_May_Be_Modified` is not specified, we duplicate the input so that we can modify it in-place. + if .Input_May_Be_Modified not_in opts.flags { + data = bytes.clone(data) + } + + t := new(Tokenizer) + init(t, string(data), path, error_handler) + + doc = new(Document) + doc.allocator = allocator + doc.tokenizer = t + doc.input = data + + doc.elements = make([dynamic]Element, 1024, 1024, allocator) + + err = .Unexpected_Token + element, parent: Element_ID + open: Token + + // If a DOCTYPE is present, the root tag has to match. + // If an expected DOCTYPE is given in options (i.e. it's non-empty), the DOCTYPE (if present) and root tag have to match. + expected_doctype := options.expected_doctype + + loop: for { + skip_whitespace(t) + switch t.ch { + case '<': + // Consume peeked `<` + advance_rune(t) + + open = scan(t) + // NOTE(Jeroen): We're not using a switch because this if-else chain ordered by likelihood is 2.5% faster at -o:size and -o:speed. + if likely(open.kind, Token_Kind.Ident) == .Ident { + // e.g. <odin - Start of new element. + element = new_element(doc) + if element == 0 { // First Element + parent = element + } else { + append(&doc.elements[parent].value, element) + } + + doc.elements[element].parent = parent + doc.elements[element].ident = open.text + + parse_attributes(doc, &doc.elements[element].attribs) or_return + + // If a DOCTYPE is present _or_ the caller + // asked for a specific DOCTYPE and the DOCTYPE + // and root tag don't match, we return .Invalid_Root_Tag. + if element == 0 { // Root tag? + if len(expected_doctype) > 0 && expected_doctype != open.text { + //error(t, t.offset, "Root Tag doesn't match DOCTYPE. Expected: %v, got: %v\n", expected_doctype, open.text) + return doc, .Invalid_DocType + } + } + + // One of these should follow: + // - `>`, which means we've just opened this tag and expect a later element to close it. + // - `/>`, which means this is an 'empty' or self-closing tag. + end_token := scan(t) + #partial switch end_token.kind { + case .Gt: + // We're now the new parent. + parent = element + + case .Slash: + // Empty tag. Close it. + expect(t, .Gt) or_return + parent = doc.elements[element].parent + element = parent + + case: + //error(t, t.offset, "Expected close tag, got: %#v\n", end_token) + return + } + + } else if open.kind == .Slash { + // Close tag. + ident := expect(t, .Ident) or_return + _ = expect(t, .Gt) or_return + + if doc.elements[element].ident != ident.text { + //error(t, t.offset, "Mismatched Closing Tag. Expected %v, got %v\n", doc.elements[element].ident, ident.text) + return doc, .Mismatched_Closing_Tag + } + parent = doc.elements[element].parent + element = parent + + } else if open.kind == .Exclaim { + // <! + next := scan(t) + #partial switch next.kind { + case .Ident: + switch next.text { + case "DOCTYPE": + if len(doc.doctype.ident) > 0 { + return doc, .Too_Many_DocTypes + } + if doc.element_count > 0 { + return doc, .DocType_Must_Preceed_Elements + } + parse_doctype(doc) or_return + + if len(expected_doctype) > 0 && expected_doctype != doc.doctype.ident { + //error(t, t.offset, "Invalid DOCTYPE. Expected: %v, got: %v\n", expected_doctype, doc.doctype.ident) + return doc, .Invalid_DocType + } + expected_doctype = doc.doctype.ident + + case: + if .Error_on_Unsupported in opts.flags { + //error(t, t.offset, "Unhandled: <!%v\n", next.text) + return doc, .Unhandled_Bang + } + skip_element(t) or_return + } + + case .Dash: + // Comment: <!-- -->. + // The grammar does not allow a comment to end in ---> + expect(t, .Dash) + comment := scan_comment(t) or_return + + if .Intern_Comments in opts.flags { + if len(doc.elements) == 0 { + append(&doc.comments, comment) + } else { + el := new_element(doc) + doc.elements[el].parent = element + doc.elements[el].kind = .Comment + append(&doc.elements[el].value, comment) + append(&doc.elements[element].value, el) + } + } + + case .Open_Bracket: + // This could be a CDATA tag part of a tag's body. Unread the `<![` + t.offset -= 3 + + // Instead of calling `parse_body` here, we could also `continue loop` + // and fall through to the `case:` at the bottom of the outer loop. + // This makes the intent clearer. + parse_body(doc, element, opts) or_return + + case: + //error(t, t.offset, "Unexpected Token after <!: %#v", next) + } + + } else if open.kind == .Question { + // <?xml + next := scan(t) + #partial switch next.kind { + case .Ident: + if len(next.text) == 3 && strings.equal_fold(next.text, "xml") { + parse_prologue(doc) or_return + } else if len(doc.prologue) > 0 { + // We've already seen a prologue. + return doc, .Too_Many_Prologs + } else { + // Could be `<?xml-stylesheet`, etc. Ignore it. + skip_element(t) or_return + } + case: + //error(t, t.offset, "Expected \"<?xml\", got \"<?%v\".", next.text) + return + } + + } else { + //error(t, t.offset, "Invalid Token after <: %#v\n", open) + return + } + + case -1: + // End of file. + break loop + + case: + // This should be a tag's body text. + parse_body(doc, element, opts) or_return + } + } + + if .Must_Have_Prolog in opts.flags && len(doc.prologue) == 0 { + return doc, .No_Prolog + } + + if .Must_Have_DocType in opts.flags && len(doc.doctype.ident) == 0 { + return doc, .No_DocType + } + + resize(&doc.elements, int(doc.element_count)) + return doc, .None +} + +parse_string :: proc(data: string, options := DEFAULT_OPTIONS, path := "", error_handler := default_error_handler, allocator := context.allocator) -> (doc: ^Document, err: Error) { + _data := transmute([]u8)data + + return parse_bytes(_data, options, path, error_handler, allocator) +} + +parse :: proc { parse_string, parse_bytes } + +// Load an XML file +load_from_file :: proc(filename: string, options := DEFAULT_OPTIONS, error_handler := default_error_handler, allocator := context.allocator) -> (doc: ^Document, err: Error) { + context.allocator = allocator + options := options + + data, data_ok := os.read_entire_file(filename) + if !data_ok { return {}, .File_Error } + + options.flags += { .Input_May_Be_Modified } + + return parse_bytes(data, options, filename, error_handler, allocator) +} + +destroy :: proc(doc: ^Document) { + if doc == nil { return } + + for el in doc.elements { + delete(el.attribs) + delete(el.value) + } + delete(doc.elements) + + delete(doc.prologue) + delete(doc.comments) + delete(doc.input) + + for s in doc.strings_to_free { + delete(s) + } + delete(doc.strings_to_free) + + free(doc.tokenizer) + free(doc) +} + +/* + Helpers. +*/ + +validate_options :: proc(options: Options) -> (validated: Options, err: Error) { + validated = options + + if .Error_on_Unsupported in validated.flags && .Ignore_Unsupported in validated.flags { + return options, .Conflicting_Options + } + return validated, .None +} + +expect :: proc(t: ^Tokenizer, kind: Token_Kind, multiline_string := false) -> (tok: Token, err: Error) { + tok = scan(t, multiline_string=multiline_string) + if tok.kind == kind { return tok, .None } + + //error(t, t.offset, "Expected \"%v\", got \"%v\".", kind, tok.kind) + return tok, .Unexpected_Token +} + +parse_attribute :: proc(doc: ^Document) -> (attr: Attribute, offset: int, err: Error) { + assert(doc != nil) + context.allocator = doc.allocator + t := doc.tokenizer + + key := expect(t, .Ident) or_return + _ = expect(t, .Eq) or_return + value := expect(t, .String, multiline_string=true) or_return + + normalized, normalize_err := entity.decode_xml(value.text, {.Normalize_Whitespace}, doc.allocator) + if normalize_err == .None { + append(&doc.strings_to_free, normalized) + value.text = normalized + } + + attr.key = key.text + attr.val = value.text + + err = .None + return +} + +check_duplicate_attributes :: proc(t: ^Tokenizer, attribs: Attributes, attr: Attribute, offset: int) -> (err: Error) { + for a in attribs { + if attr.key == a.key { + //error(t, offset, "Duplicate attribute: %v\n", attr.key) + return .Duplicate_Attribute + } + } + return .None +} + +parse_attributes :: proc(doc: ^Document, attribs: ^Attributes) -> (err: Error) { + assert(doc != nil) + context.allocator = doc.allocator + t := doc.tokenizer + + for peek(t).kind == .Ident { + attr, offset := parse_attribute(doc) or_return + check_duplicate_attributes(t, attribs^, attr, offset) or_return + append(attribs, attr) + } + skip_whitespace(t) + return .None +} + +parse_prologue :: proc(doc: ^Document) -> (err: Error) { + assert(doc != nil) + context.allocator = doc.allocator + t := doc.tokenizer + + offset := t.offset + parse_attributes(doc, &doc.prologue) or_return + + for attr in doc.prologue { + switch attr.key { + case "version": + switch attr.val { + case "1.0", "1.1": + case: + //error(t, offset, "[parse_prologue] Warning: Unhandled XML version: %v\n", attr.val) + } + + case "encoding": + runtime.DEFAULT_TEMP_ALLOCATOR_TEMP_GUARD() + switch strings.to_lower(attr.val, context.temp_allocator) { + case "utf-8", "utf8": + doc.encoding = .UTF_8 + + case "latin-1", "latin1", "iso-8859-1": + doc.encoding = .LATIN_1 + + case: + // Unrecognized encoding, assume UTF-8. + //error(t, offset, "[parse_prologue] Warning: Unrecognized encoding: %v\n", attr.val) + } + + case: + // Ignored. + } + } + + _ = expect(t, .Question) or_return + _ = expect(t, .Gt) or_return + + return .None +} + +skip_element :: proc(t: ^Tokenizer) -> (err: Error) { + close := 1 + + loop: for { + tok := scan(t) + #partial switch tok.kind { + case .EOF: + //error(t, t.offset, "[skip_element] Premature EOF\n") + return .Premature_EOF + + case .Lt: + close += 1 + + case .Gt: + close -= 1 + if close == 0 { + break loop + } + + case: + + } + } + return .None +} + +parse_doctype :: proc(doc: ^Document) -> (err: Error) { + /* + <!DOCTYPE greeting SYSTEM "hello.dtd"> + + <!DOCTYPE greeting [ + <!ELEMENT greeting (#PCDATA)> + ]> + */ + assert(doc != nil) + context.allocator = doc.allocator + t := doc.tokenizer + + tok := expect(t, .Ident) or_return + doc.doctype.ident = tok.text + + skip_whitespace(t) + offset := t.offset + skip_element(t) or_return + + // -1 because the current offset is that of the closing tag, so the rest of the DOCTYPE tag ends just before it. + doc.doctype.rest = string(t.src[offset : t.offset - 1]) + return .None +} + +parse_body :: proc(doc: ^Document, element: Element_ID, opts: Options) -> (err: Error) { + assert(doc != nil) + context.allocator = doc.allocator + t := doc.tokenizer + + body_text := scan_string(t, t.offset) or_return + needs_processing := .Unbox_CDATA in opts.flags + needs_processing |= .Decode_SGML_Entities in opts.flags + + if !needs_processing { + append(&doc.elements[element].value, body_text) + return + } + + decode_opts := entity.XML_Decode_Options{} + if .Keep_Tag_Body_Comments not_in opts.flags { + decode_opts += { .Comment_Strip } + } + + if .Decode_SGML_Entities not_in opts.flags { + decode_opts += { .No_Entity_Decode } + } + + if .Unbox_CDATA in opts.flags { + decode_opts += { .Unbox_CDATA } + if .Decode_SGML_Entities in opts.flags { + decode_opts += { .Decode_CDATA } + } + } + + decoded, decode_err := entity.decode_xml(body_text, decode_opts) + if decode_err == .None { + append(&doc.elements[element].value, decoded) + append(&doc.strings_to_free, decoded) + } else { + append(&doc.elements[element].value, body_text) + } + + return +} + +Element_ID :: u32 + +new_element :: proc(doc: ^Document) -> (id: Element_ID) { + element_space := len(doc.elements) + + // Need to resize + if int(doc.element_count) + 1 > element_space { + if element_space < 65536 { + element_space *= 2 + } else { + element_space += 65536 + } + resize(&doc.elements, element_space) + } + + cur := doc.element_count + doc.element_count += 1 + return cur +}
\ No newline at end of file |