334
301
@SuppressWarnings("serial")
335
302
protected class SkipContactException extends Exception { }
337
public void parseLine( ByteBuffer buffer )
304
public void parseLine( String line )
338
305
throws ParseException, SkipContactException,
339
306
AbortImportException
341
// get a US-ASCII version of the line for processing
344
line = new String( buffer.array(), buffer.position(),
345
buffer.limit() - buffer.position(), "US-ASCII" );
347
catch( UnsupportedEncodingException e ) {
348
// we know US-ASCII is supported, so appease the compiler...
352
308
// ignore empty lines
353
if( line.trim().equals( "" ) ) return;
309
if( line.trim() == "" ) return;
355
311
// split line into name and value parts (this may turn out to be
356
312
// unwanted if the line is a subsequent line in a multi-line
357
313
// value, but we have to do this now to check for and handle VCF
358
// versions first). Also, the value part is only created tentatively
359
// because it may have an encoding/charset. Since we're treating it
360
// as UTF-8 (which is compatible with 7-bit US-ASCII) this is ok
361
// though so long as we later use the raw bytes. ALso we check for
362
// malformed property:name pairs.
363
String name_and_params, string_value;
365
String[] parts = line.split( ":", 2 );
366
if( parts.length == 2 ) {
367
name_and_params = parts[ 0 ].trim();
368
string_value = parts[ 1 ].trim();
369
if( name_and_params.length() == 0 )
370
throw new ParseException( R.string.error_vcf_malformed );
374
if( !_parser_in_multiline )
375
throw new ParseException( R.string.error_vcf_malformed );
376
name_and_params = null;
315
String[] props = line.split( ":", 2 );
316
for( int i = 0; i < props.length; i++ )
317
props[ i ] = props[ i ].trim();
381
319
// if we haven't yet got a version, we won't be paring anything!
382
320
if( _version == null )
384
322
// is this a version?
385
if( name_and_params.equals( "VERSION" ) )
323
if( props.length == 2 && props[ 0 ].equals( "VERSION" ) )
387
325
// yes, check/store it
388
if( !string_value.equals( "2.1" ) &&
389
!string_value.equals( "3.0" ) )
326
if( !props[ 1 ].equals( "2.1" ) &&
327
!props[ 1 ].equals( "3.0" ) )
390
328
throw new ParseException( R.string.error_vcf_version );
391
_version = string_value;
329
_version = props[ 1 ];
393
// parse any other buffers we've accumulated so far
394
if( _buffers != null )
395
for( int i = 0; i < _buffers.size(); i++ )
396
parseLine( _buffers.get( i ) );
331
// parse any other lines we've accumulated so far
333
for( int i = 0; i < _lines.size(); i++ )
334
parseLine( _lines.get( i ) );
401
// no, so stash this buffer till we have a version
402
if( _buffers == null )
403
_buffers = new Vector< ByteBuffer >();
404
_buffers.add( buffer );
339
// no, so stash this line till we have a version
341
_lines = new Vector< String >();
409
// value bytes, for processing
412
347
if( _parser_in_multiline )
414
349
// if we're currently in a multi-line value, use the stored
415
350
// property name and parameters
416
name_and_params = _parser_current_name_and_params;
418
// find start of string (skip spaces/tabs)
419
int pos = buffer.position();
420
byte[] buffer_array = buffer.array();
421
while( pos < buffer.limit() && (
422
buffer_array[ pos ] == ' ' ||
423
buffer_array[ pos ] == '\t' ) )
428
// get value from buffer
429
value = ByteBuffer.wrap( buffer.array(), pos,
430
buffer.limit() - pos );
351
props = new String[ 2 ];
352
props[ 0 ] = _parser_current_name_and_params;
353
props[ 1 ] = line.trim();
434
// ignore empty values
435
if( string_value.length() < 1 ) return;
437
// calculate how many chars to skip from beginning of line
438
// so we skip the property "name:" part
439
int pos = buffer.position() + name_and_params.length() + 1;
441
// get value from buffer
442
value = ByteBuffer.wrap( buffer.array(), pos,
443
buffer.limit() - pos );
357
// for normal lines, check the property name/value bits
358
if( props.length < 2 || props[ 0 ].length() == 0 )
359
throw new ParseException(
360
R.string.error_vcf_malformed );
362
// ignore empty properties
363
if( props[ 1 ].length() < 1 )
445
366
// reset the saved multi-line state
446
_parser_current_name_and_params = name_and_params;
367
_parser_current_name_and_params = props[ 0 ];
447
368
_parser_buffered_value_so_far = "";
450
371
// get parameter parts
451
String[] name_param_parts = name_and_params.split( ";", -1 );
452
for( int i = 0; i < name_param_parts.length; i++ )
453
name_param_parts[ i ] = name_param_parts[ i ].trim();
372
String[] params = props[ 0 ].split( ";" );
373
for( int i = 0; i < params.length; i++ )
374
params[ i ] = params[ i ].trim();
455
// parse encoding parameter
456
String encoding = checkParam( name_param_parts, "ENCODING" );
457
if( encoding != null ) encoding = encoding.toUpperCase();
458
if( encoding != null && !encoding.equals( "8BIT" ) &&
459
!encoding.equals( "QUOTED-PRINTABLE" ) )
376
// parse charset and encoding parameters
377
String charset, encoding;
378
if( ( charset = checkParam( params, "CHARSET" ) ) != null &&
379
!charset.equals( "UTF-8" ) && !charset.equals( "UTF-16" ) )
381
throw new ParseException( R.string.error_vcf_charset );
383
if( ( encoding = checkParam( params, "ENCODING" ) ) != null &&
384
!encoding.equals( "QUOTED-PRINTABLE" ) &&
385
!encoding.equals( "8BIT" ) )
460
386
//&& !encoding.equals( "BASE64" ) )
462
388
throw new ParseException( R.string.error_vcf_encoding );
465
// parse charset parameter
466
String charset = checkParam( name_param_parts, "CHARSET" );
467
if( charset != null ) charset = charset.toUpperCase();
468
if( charset != null && !charset.equals( "US-ASCII" ) &&
469
!charset.equals( "ASCII" ) && !charset.equals( "UTF-8" ) )
471
throw new ParseException( R.string.error_vcf_charset );
474
391
// do unencoding (or default to a fake unencoding result with
475
392
// the raw string)
476
UnencodeResult unencoding_result = null;
393
UnencodeResult result;
477
394
if( encoding != null && encoding.equals( "QUOTED-PRINTABLE" ) )
478
unencoding_result = unencodeQuotedPrintable( value );
395
result = unencodeQuotedPrintable( props[ 1 ], charset );
479
396
// else if( encoding != null && encoding.equals( "BASE64" ) )
480
397
// result = unencodeBase64( props[ 1 ], charset );
481
if( unencoding_result != null ) {
482
value = unencoding_result.getBuffer();
483
_parser_in_multiline =
484
unencoding_result.isAnotherLineRequired();
487
// convert 8-bit ASCII charset to US-ASCII
488
if( charset == null || charset.equals( "ASCII" ) ) {
489
value = transcodeAsciiToUtf8( value );
399
result = new UnencodeResult( false, props[ 1 ].getBytes(),
400
props[ 1 ].getBytes().length );
493
402
// process charset
496
new String( value.array(), value.position(),
497
value.limit() - value.position(), charset );
404
props[ 1 ] = new String( result.getBytes(), 0,
405
result.getNumBytes(),
406
charset == null? "UTF-8" : charset );
498
407
} catch( UnsupportedEncodingException e ) {
499
408
throw new ParseException( R.string.error_vcf_charset );
502
411
// handle multi-line requests
412
_parser_in_multiline = result.isAnotherLineRequired();
503
413
if( _parser_in_multiline ) {
504
_parser_buffered_value_so_far += string_value;
414
_parser_buffered_value_so_far += props[ 1 ];
508
418
// add on buffered multi-line content
509
String complete_value =
510
_parser_buffered_value_so_far + string_value;
419
String value = _parser_buffered_value_so_far + props[ 1 ];
512
421
// parse some properties
513
if( name_param_parts[ 0 ].equals( "N" ) )
514
parseN( name_param_parts, complete_value );
515
else if( name_param_parts[ 0 ].equals( "FN" ) )
516
parseFN( name_param_parts, complete_value );
517
else if( name_param_parts[ 0 ].equals( "ORG" ) )
518
parseORG( name_param_parts, complete_value );
519
else if( name_param_parts[ 0 ].equals( "TEL" ) )
520
parseTEL( name_param_parts, complete_value );
521
else if( name_param_parts[ 0 ].equals( "EMAIL" ) )
522
parseEMAIL( name_param_parts, complete_value );
422
if( params[ 0 ].equals( "N" ) )
423
parseN( params, value );
424
else if( params[ 0 ].equals( "FN" ) )
425
parseFN( params, value );
426
else if( params[ 0 ].equals( "ORG" ) )
427
parseORG( params, value );
428
else if( params[ 0 ].equals( "TEL" ) )
429
parseTEL( params, value );
430
else if( params[ 0 ].equals( "EMAIL" ) )
431
parseEMAIL( params, value );
665
570
private Set< String > extractTypes( String[] params,
666
List< String > valid_types )
571
List< String > validTypes )
668
573
HashSet< String > types = new HashSet< String >();
670
575
// get 3.0-style TYPE= param
672
if( ( type_param = checkParam( params, "TYPE" ) ) != null ) {
673
String[] parts = type_param.split( "," );
674
for( int i = 0; i < parts.length; i++ )
675
if( valid_types.contains( parts[ i ] ) )
676
types.add( parts[ i ] );
577
if( ( typeParam = checkParam( params, "TYPE" ) ) != null ) {
578
String[] bits = typeParam.split( "," );
579
for( int i = 0; i < bits.length; i++ )
580
if( validTypes.contains( bits[ i ] ) )
581
types.add( bits[ i ] );
679
584
// get 2.1-style type param
680
585
if( _version.equals( "2.1" ) ) {
681
586
for( int i = 1; i < params.length; i++ )
682
if( valid_types.contains( params[ i ] ) )
587
if( validTypes.contains( params[ i ] ) )
683
588
types.add( params[ i ] );
689
private UnencodeResult unencodeQuotedPrintable( ByteBuffer in )
594
private UnencodeResult unencodeQuotedPrintable( String str, String charset )
691
596
boolean another = false;
598
// default encoding scheme
599
if( charset == null ) charset = "UTF-8";
693
601
// unencode quoted-pritable encoding, as per RFC1521 section 5.1
694
byte[] out = new byte[ in.limit() - in.position() ];
602
byte[] bytes = new byte[ str.length() ];
696
for( int i = in.position(); i < in.limit(); i++ )
604
for( int i = 0; i < str.length(); i++ )
698
606
// get next char and process...
699
byte ch = in.array()[ i ];
700
if( ch == '=' && i < in.limit() - 2 )
607
char ch = str.charAt( i );
608
if( ch == '=' && i < str.length() - 2 )
702
610
// we found a =XX format byte, add it
704
Character.digit( in.array()[ i + 1 ], 16 ) * 16 +
705
Character.digit( in.array()[ i + 2 ], 16 ) );
612
Character.digit( str.charAt( i + 1 ), 16 ) * 16 +
613
Character.digit( str.charAt( i + 2 ), 16 ) );
708
else if( ch == '=' && i == in.limit() - 1 )
616
else if( ch == '=' && i == str.length() - 1 )
710
618
// we found a '=' at the end of a line signifying a multi-
711
619
// line string, so we don't add it.