import sys import re def remove_comments(text): return re.sub(r'(?m)^[ \t]*%.*\n?', '', text) def find_entries(bibtex_content): entry_regex = re.compile(r'@(\w+)\s*[{(]', re.IGNORECASE) positions = [m.start() for m in entry_regex.finditer(bibtex_content)] entries = [] for i, start in enumerate(positions): end = positions[i+1] if i+1 < len(positions) else len(bibtex_content) entries.append(bibtex_content[start:end].strip()) return entries def parse_bibtex_entry(entry): entry = entry.strip() if not entry or entry.startswith('%'): return None m = re.match(r'@(\w+)\s*[{(]\s*([^,]+),?', entry, re.DOTALL) if not m: return None entry_type, entry_key = m.groups() entry_type = entry_type.lower() if entry_type in ('comment', 'preamble'): return None if entry_type == 'string': return None body = entry[m.end():] body = body.rstrip('})').strip() fields = {} field_regex = re.compile(r'(\w+)\s*=\s*', re.DOTALL) pos = 0 while pos < len(body): m = field_regex.match(body, pos) if not m: break field = m.group(1) pos = m.end() if body[pos] == '{': brace_level = 1 val_start = pos + 1 pos += 1 while pos < len(body) and brace_level > 0: if body[pos] == '{': brace_level += 1 elif body[pos] == '}': brace_level -= 1 pos += 1 value = body[val_start:pos-1] elif body[pos] == '"': val_start = pos + 1 pos += 1 while pos < len(body) and body[pos] != '"': if body[pos] == '\\': pos += 2 else: pos += 1 value = body[val_start:pos] pos += 1 else: val_start = pos while pos < len(body) and body[pos] not in ',\n': pos += 1 value = body[val_start:pos].strip() fields[field] = value.replace('\n', ' ').strip() while pos < len(body) and body[pos] in ', \n\r\t': pos += 1 return entry_type, entry_key, fields def escape_yaml_string(s): # Escape double quotes and backslashes, wrap in double quotes if needed if not s: return '""' if any(c in s for c in ':\'"{}[],&*#?|-<>=!%@`\\\n'): s = s.replace('\\', '\\\\').replace('"', '\\"') return f'"{s}"' return s def dict_to_yaml(d, indent=0): lines = [] for k, v in d.items(): if isinstance(v, list): lines.append(' ' * indent + f"{k}:") for item in v: lines.append(' ' * (indent + 2) + f"- {escape_yaml_string(item)}") else: lines.append(' ' * indent + f"{k}: {escape_yaml_string(v)}") return '\n'.join(lines) def bibtex_to_yaml(bibtex_content): bibtex_content = remove_comments(bibtex_content) entries = find_entries(bibtex_content) yaml_entries = [] for entry in entries: parsed = parse_bibtex_entry(entry) if not parsed: continue entry_type, entry_key, fields = parsed entry_dict = {'key': entry_key} if "author" in fields: authors = [a.strip() for a in re.split(r'\s+and\s+', fields["author"])] entry_dict['author'] = authors del fields["author"] entry_dict.update(fields) yaml_entries.append(entry_dict) yaml_str = "publications:\n" for entry in yaml_entries: yaml_str += " - " + dict_to_yaml(entry, indent=4).lstrip().replace('\n', '\n ') + "\n" return yaml_str def main(): if len(sys.argv) != 2: print(f"Usage: {sys.argv[0]} ") sys.exit(1) bibfile = sys.argv[1] with open(bibfile, 'r', encoding='utf-8') as f: bibtex_content = f.read() yaml_content = bibtex_to_yaml(bibtex_content) yamlfile = bibfile.rsplit('.', 1)[0] + '.yaml' with open(yamlfile, 'w', encoding='utf-8') as f: f.write(yaml_content) print(f"YAML exported to {yamlfile}") if __name__ == "__main__": main()