/android/import-contacts

To get this branch, use:
bzr branch http://bzr.ed.am/android/import-contacts

« back to all changes in this revision

Viewing changes to src/org/waxworlds/edam/importcontacts/VCFImporter.java

  • Committer: edam
  • Date: 2011-03-19 20:33:09 UTC
  • Revision ID: edam@waxworlds.org-20110319203309-5dzfyqrxwk94jtin
- formatting: removed some double-indents on overrunning lines
- updated TODO and NEWS
- rewrote central logic of parser so it makes more sense, looks nicer and has a small optimisation (getting name and params from line only when necessary)
- optimised unnecessary mutliple converting of lines to US-ASCII
- re-wrote line extraction from vcards so that we can lookahead for v3 folded lines
- added support for v3 folded lines

Show diffs side-by-side

added added

removed removed

32
32
import java.io.IOException;
33
33
import java.io.UnsupportedEncodingException;
34
34
import java.nio.ByteBuffer;
35
 
import java.util.ArrayList;
36
35
import java.util.Arrays;
37
36
import java.util.HashSet;
38
37
import java.util.Iterator;
39
38
import java.util.List;
40
 
import java.util.NoSuchElementException;
41
39
import java.util.Set;
42
40
import java.util.Vector;
43
41
import java.util.regex.Matcher;
44
42
import java.util.regex.Pattern;
 
43
import java.util.NoSuchElementException;
 
44
import java.lang.UnsupportedOperationException;
45
45
 
46
46
import android.content.SharedPreferences;
47
47
import android.provider.Contacts;
334
334
                private final static int NAMELEVEL_FN = 2;
335
335
                private final static int NAMELEVEL_N = 3;
336
336
 
337
 
                private final static int MULTILINE_NONE = 0;
338
 
                private final static int MULTILINE_ENCODED = 1; // v2.1 quoted-printable
339
 
                private final static int MULTILINE_ESCAPED = 2; // v2.1 \\CRLF
340
 
                private final static int MULTILINE_FOLDED = 3;  // v3.0 folding
341
 
 
342
337
                private String _version = null;
343
338
                private Vector< ByteBuffer > _buffers = null;
344
339
                private int _name_level = NAMELEVEL_NONE;
345
 
                private int _parser_multiline_state = MULTILINE_NONE;
 
340
                private boolean _parser_in_encoded_multiline = false;
 
341
                private boolean _parser_in_folded_multiline = false;
346
342
                private String _parser_current_name_and_params = null;
347
343
                private String _parser_buffered_value_so_far = "";
348
344
 
473
469
                                String name_and_params;
474
470
                                int pos;
475
471
 
476
 
                                if( _parser_multiline_state != MULTILINE_NONE )
 
472
                                if( _parser_in_encoded_multiline ||
 
473
                                        _parser_in_folded_multiline )
477
474
                                {
478
475
                                        // if we're currently in a multi-line value, use the stored
479
476
                                        // property name and parameters
480
477
                                        name_and_params = _parser_current_name_and_params;
481
478
 
482
 
                                        // skip some initial line characters, depending on the type
483
 
                                        // of multi-line we're handling
484
479
                                        pos = buffer.position();
485
 
                                        switch( _parser_multiline_state )
486
 
                                        {
487
 
                                        case MULTILINE_FOLDED:
 
480
 
 
481
                                        // for folded multi-lines, skip the single space at the
 
482
                                        // start of the next line
 
483
                                        if( _parser_in_folded_multiline )
488
484
                                                pos++;
489
 
                                                break;
490
 
                                        case MULTILINE_ENCODED:
 
485
 
 
486
                                        // else, this must be an encoded multi-line, so skip any
 
487
                                        // whitespace we find at the start of the next line
 
488
                                        else
491
489
                                                while( pos < buffer.limit() && (
492
490
                                                        buffer.get( pos ) == ' ' ||
493
491
                                                        buffer.get( pos ) == '\t' ) )
494
492
                                                {
495
493
                                                        pos++;
496
494
                                                }
497
 
                                                break;
498
 
                                        default:
499
 
                                                // do nothing
500
 
                                        }
501
 
 
502
 
                                        // take us out of multi-line so that we can re-detect that
503
 
                                        // this line is a multi-line or not
504
 
                                        _parser_multiline_state = MULTILINE_NONE;
505
495
                                }
506
496
                                else
507
497
                                {
562
552
//                                      unencoding_result = unencodeBase64( props[ 1 ], charset );
563
553
                                if( unencoding_result != null ) {
564
554
                                        value = unencoding_result.getBuffer();
565
 
                                        if( unencoding_result.isAnotherLineRequired() )
566
 
                                                _parser_multiline_state = MULTILINE_ENCODED;
 
555
                                        _parser_in_encoded_multiline =
 
556
                                                unencoding_result.isAnotherLineRequired();
567
557
                                }
568
558
 
569
559
                                // convert 8-bit ASCII charset to US-ASCII
581
571
                                        throw new ParseException( R.string.error_vcf_charset );
582
572
                                }
583
573
 
584
 
                                // for some entries that have semicolon-separated value parts,
585
 
                                // check to see if the value ends in an escape character, which
586
 
                                // indicates that we have a multi-line value
587
 
                                if( ( name_param_parts[ 0 ].equals( "N" ) ||
588
 
                                        name_param_parts[ 0 ].equals( "ORG" ) ||
589
 
                                        name_param_parts[ 0 ].equals( "ADR" ) ) &&
590
 
                                        doesStringEndInAnEscapeChar( string_value ) )
591
 
                                {
592
 
                                        _parser_multiline_state = MULTILINE_ESCAPED;
593
 
                                        string_value = string_value.substring( 0,
594
 
                                                string_value.length() - 1 );
595
 
                                }
596
 
 
597
574
                                // now we know whether we're in an encoding multi-line,
598
575
                                // determine if we're in a v3 folded multi-line or not
599
 
                                if( _parser_multiline_state == MULTILINE_NONE &&
600
 
                                        _version.equals( "3.0" ) && next_line_looks_folded )
 
576
                                _parser_in_folded_multiline = !_parser_in_encoded_multiline &&
 
577
                                        _version.equals( "3.0" ) && next_line_looks_folded;
 
578
 
 
579
                                // handle multi-line requests
 
580
                                if( _parser_in_encoded_multiline ||
 
581
                                        _parser_in_folded_multiline )
601
582
                                {
602
 
                                        _parser_multiline_state = MULTILINE_FOLDED;
603
 
                                }
604
 
 
605
 
                                // handle multi-lines by buffering them and parsing them when we
606
 
                                // are processing the last line in a multi-line sequence
607
 
                                if( _parser_multiline_state != MULTILINE_NONE ) {
608
583
                                        _parser_buffered_value_so_far += string_value;
609
584
                                        return;
610
585
                                }
 
586
 
 
587
                                // add on buffered multi-line content
611
588
                                String complete_value =
612
 
                                        ( _parser_buffered_value_so_far + string_value ).trim();
 
589
                                        _parser_buffered_value_so_far + string_value;
613
590
 
614
591
                                // ignore empty values
615
592
                                if( complete_value.length() < 1 ) return;
625
602
                                        parseTEL( name_param_parts, complete_value );
626
603
                                else if( name_param_parts[ 0 ].equals( "EMAIL" ) )
627
604
                                        parseEMAIL( name_param_parts, complete_value );
628
 
                                else if( name_param_parts[ 0 ].equals( "ADR" ) )
629
 
                                        parseADR( name_param_parts, complete_value );
630
 
                        }
631
 
                }
632
 
 
633
 
                private boolean doesStringEndInAnEscapeChar( String string )
634
 
                {
635
 
                        // count the number of backslashes at the end of the string
636
 
                        int count = 0;
637
 
                        for( int a = string.length() - 1; a >= 0; a-- )
638
 
                                if( string.charAt( a ) == '\\' )
639
 
                                        count++;
640
 
                                else
641
 
                                        break;
642
 
 
643
 
                        // if there are an even number of backslashes then the final one
644
 
                        // doesn't count
645
 
                        return ( count & 1 ) == 1;
646
 
                }
647
 
 
648
 
                private String[] splitValueBySemicolon( String value )
649
 
                {
650
 
                        // split string in to parts by semicolon
651
 
                        ArrayList< String > parts = new ArrayList< String >(
652
 
                                Arrays.asList( value.split(  ";" ) ) );
653
 
 
654
 
                        // go through parts
655
 
                        for( int a = 0; a < parts.size(); a++ )
656
 
                        {
657
 
                                String str = parts.get( a );
658
 
 
659
 
                                // look for parts that end in an escape character, but ignore
660
 
                                // the final part. We've already detected escape chars at the
661
 
                                // end of the final part in parseLine() and handled multi-lines
662
 
                                // accordingly.
663
 
                                if( a < parts.size() - 1 &&
664
 
                                        doesStringEndInAnEscapeChar( str ) )
665
 
                                {
666
 
                                        // join the next part to this part and remove the next part
667
 
                                        parts.set( a, str.substring( 0, str.length() - 1 ) +
668
 
                                                ';' + parts.get( a + 1 ) );
669
 
                                        parts.remove( a + 1 );
670
 
 
671
 
                                        // re-visit this part
672
 
                                        a--;
673
 
                                        continue;
674
 
                                }
675
 
 
676
 
                                // trim and replace string
677
 
                                str = str.trim();
678
 
                                parts.set( a, str );
679
 
                        }
680
 
 
681
 
                        String[] ret = new String[ parts.size() ];
682
 
                        return parts.toArray( ret );
 
605
                        }
683
606
                }
684
607
 
685
608
                private void parseN( String[] params, String value )
690
613
                        if( _name_level >= NAMELEVEL_N ) return;
691
614
 
692
615
                        // get name parts
693
 
                        String[] name_parts = splitValueBySemicolon( value );
 
616
                        String[] name_parts = value.split( ";" );
 
617
                        for( int i = 0; i < name_parts.length; i++ )
 
618
                                name_parts[ i ] = name_parts[ i ].trim();
694
619
 
695
620
                        // build name
696
621
                        value = "";
727
652
                        if( _name_level >= NAMELEVEL_ORG ) return;
728
653
 
729
654
                        // get org parts
730
 
                        String[] org_parts = splitValueBySemicolon( value );
 
655
                        String[] org_parts = value.split( ";" );
 
656
                        for( int i = 0; i < org_parts.length; i++ )
 
657
                                org_parts[ i ] = org_parts[ i ].trim();
731
658
 
732
659
                        // build name
733
660
                        if( org_parts.length > 1 && org_parts[ 0 ].length() == 0 )
734
661
                                value = org_parts[ 1 ];
735
 
                        else if( org_parts.length > 1 && org_parts[ 1 ].length() > 0 )
736
 
                                value = org_parts[ 0 ] + ", " + org_parts[ 1 ];
737
662
                        else
738
663
                                value = org_parts[ 0 ];
739
664
 
781
706
                        Set< String > types = extractTypes( params, Arrays.asList(
782
707
                                "PREF", "WORK", "HOME", "INTERNET" ) );
783
708
 
784
 
                        // add email address
 
709
                        // here's the logic...
785
710
                        boolean preferred = types.contains( "PREF" );
786
711
                        if( types.contains( "WORK" ) )
787
712
                                addEmail( value, Contacts.ContactMethods.TYPE_WORK, preferred );
789
714
                                addEmail( value, Contacts.ContactMethods.TYPE_HOME, preferred );
790
715
                }
791
716
 
792
 
                private void parseADR( String[] params, String value )
793
 
                        throws ParseException, SkipContactException
794
 
                {
795
 
                        // get address parts
796
 
                        String[] adr_parts = splitValueBySemicolon( value );
797
 
 
798
 
                        // build address
799
 
                        value = "";
800
 
                        for( int a = 0; a < adr_parts.length; a++ ) {
801
 
                                if( value.length() > 0 ) value += "\n";
802
 
                                value += adr_parts[ a ].trim();
803
 
                        }
804
 
 
805
 
                        Set< String > types = extractTypes( params, Arrays.asList(
806
 
                                "PREF", "WORK", "HOME", "INTERNET" ) );
807
 
 
808
 
                        // add address
809
 
                        if( types.contains( "WORK" ) )
810
 
                                addAddress( value, Contacts.ContactMethods.TYPE_WORK );
811
 
                        else
812
 
                                addAddress( value, Contacts.ContactMethods.TYPE_HOME);
813
 
                }
814
 
 
815
717
                public void finaliseParsing()
816
718
                        throws ParseException, SkipContactException,
817
719
                        AbortImportException